From 6e5cc698ef68001c6b6688e95b55efac98c01021 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 30 Aug 2019 13:10:43 +0200
Subject: [PATCH 01/22] -Solvent: Adding parameters to the conf

---
 conf/hermes.conf | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/conf/hermes.conf b/conf/hermes.conf
index e9a1539..11d0ff3 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -135,12 +135,13 @@ shipping_port_processors = 0
 livestock_processors = 0
 crop_operations_processors = 0
 crop_fertilizers_processors = 0
-agricultural_machinery_processors = 1
+agricultural_machinery_processors = 0
 residential_processors = 0
 recreational_boats_processors = 0
 point_sources_processors = 0
 traffic_processors = 0
 traffic_area_processors = 0
+solvent_processors = 1
 
 
 [SHAPEFILES]
@@ -345,3 +346,13 @@ traffic_area_small_cities_ef_file = <input_dir>/traffic_area/ef/small_cities.csv
 small_cities_monthly_profile = <input_dir>/profiles/temporal/traffic_area/small_cities_monthly_profiles.csv
 small_cities_weekly_profile = <input_dir>/profiles/temporal/traffic_area/small_cities_weekly_profiles.csv
 small_cities_hourly_profile = <input_dir>/profiles/temporal/traffic_area/small_cities_hourly_profiles.csv
+
+[SOLVENT]
+solvent_pollutants = nox_no2
+solvent_description_path = <input_dir>/solvent/solvent_desctiption.csv
+solvent_yearly_emissions_by_nut2_path = <input_dir>/solvent/solvent_nut2.csv
+solvent_shapefile = <input_dir>/solvent/solvent_shapefile.shp
+solvent_monthly_profile = <input_dir>/profiles/temporal/solvent/solvent_monthly_profiles.csv
+solvent_weekly_profile = <input_dir>/profiles/temporal/solvent/solvent_weekly_profiles.csv
+solvent_hourly_profile = <input_dir>/profiles/temporal/solvent/solvent_hourly_profiles.csv
+solvent_speciation_profiles = <input_dir>/profiles/speciation/solvent/solvent_base.csv
-- 
GitLab


From 516562b9acbfb8d3acb00feefed3bfebc6ca66d5 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 30 Aug 2019 14:07:41 +0200
Subject: [PATCH 02/22] -Solvent: Adding parameters to the conf

---
 conf/hermes.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/hermes.conf b/conf/hermes.conf
index 11d0ff3..268a50c 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -348,7 +348,7 @@ small_cities_weekly_profile = <input_dir>/profiles/temporal/traffic_area/small_c
 small_cities_hourly_profile = <input_dir>/profiles/temporal/traffic_area/small_cities_hourly_profiles.csv
 
 [SOLVENT]
-solvent_pollutants = nox_no2
+solvent_pollutants = mnvoc
 solvent_description_path = <input_dir>/solvent/solvent_desctiption.csv
 solvent_yearly_emissions_by_nut2_path = <input_dir>/solvent/solvent_nut2.csv
 solvent_shapefile = <input_dir>/solvent/solvent_shapefile.shp
-- 
GitLab


From 9caa0ea9600f6d93e5e8dd0e0302441d216322e7 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 30 Aug 2019 15:45:39 +0200
Subject: [PATCH 03/22] Starting Solvent sector

---
 conf/hermes.conf                      | 22 +++++++++---------
 hermesv3_bu/config/config.py          | 16 ++++++++++++++
 hermesv3_bu/sectors/sector_manager.py | 13 ++++++++++-
 hermesv3_bu/sectors/solvent_sector.py | 32 +++++++++++++++++++++++++++
 4 files changed, 71 insertions(+), 12 deletions(-)
 create mode 100755 hermesv3_bu/sectors/solvent_sector.py

diff --git a/conf/hermes.conf b/conf/hermes.conf
index 268a50c..fc835e3 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -141,12 +141,13 @@ recreational_boats_processors = 0
 point_sources_processors = 0
 traffic_processors = 0
 traffic_area_processors = 0
-solvent_processors = 1
+solvents_processors = 1
 
 
 [SHAPEFILES]
 nuts3_shapefile = <input_dir>/shapefiles/nuts3/nuts3.shp
 nuts2_shapefile = <input_dir>/shapefiles/nuts2/nuts2.shp
+land_uses_path = <data_path>/ecmwf/clc/original_files/g250_clc12_v18_5a/g250_clc12_V18_5.tif
 population_density_map = <data_path>/jrc/ghsl/original_files/GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0.tif
 
 [SPECIATION DATA]
@@ -218,7 +219,6 @@ livestock_hourly_profiles = <input_dir>/profiles/temporal/livestock/hourly_profi
 livestock_speciation_profiles = <input_dir>/profiles/speciation/livestock/speciation_profiles_base.csv
 
 [AGRICULTURAL]
-land_uses_path = <data_path>/ecmwf/clc/original_files/g250_clc12_v18_5a/g250_clc12_V18_5.tif
 land_use_by_nut_path =  <input_dir>/agriculture/land_use_ccaa.csv
 crop_by_nut_path = <input_dir>/agriculture/crops_ha_2017.csv
 crop_from_landuse_path = <input_dir>/agriculture/map_crops_landuse.csv
@@ -347,12 +347,12 @@ small_cities_monthly_profile = <input_dir>/profiles/temporal/traffic_area/small_
 small_cities_weekly_profile = <input_dir>/profiles/temporal/traffic_area/small_cities_weekly_profiles.csv
 small_cities_hourly_profile = <input_dir>/profiles/temporal/traffic_area/small_cities_hourly_profiles.csv
 
-[SOLVENT]
-solvent_pollutants = mnvoc
-solvent_description_path = <input_dir>/solvent/solvent_desctiption.csv
-solvent_yearly_emissions_by_nut2_path = <input_dir>/solvent/solvent_nut2.csv
-solvent_shapefile = <input_dir>/solvent/solvent_shapefile.shp
-solvent_monthly_profile = <input_dir>/profiles/temporal/solvent/solvent_monthly_profiles.csv
-solvent_weekly_profile = <input_dir>/profiles/temporal/solvent/solvent_weekly_profiles.csv
-solvent_hourly_profile = <input_dir>/profiles/temporal/solvent/solvent_hourly_profiles.csv
-solvent_speciation_profiles = <input_dir>/profiles/speciation/solvent/solvent_base.csv
+[SOLVENTS]
+solvents_pollutants = mnvoc
+solvents_proxies_path = <input_dir>/solvents/proxies_profiles.csv
+solvents_yearly_emissions_by_nut2_path = <input_dir>/solvents/miteco_solvent_emissions_nuts2_2015.csv
+solvents_point_sources_shapefile = <input_dir>/solvents/use_solvents_point_sources.shp
+solvents_monthly_profile = <input_dir>/profiles/temporal/solvents/monthly_profiles.csv
+solvents_weekly_profile = <input_dir>/profiles/temporal/solvents/weekly_profiles.csv
+solvents_hourly_profile = <input_dir>/profiles/temporal/solvents/hourly_profiles.csv
+solvents_speciation_profiles = <input_dir>/profiles/speciation/solvents/speciation_profiles_base.csv
diff --git a/hermesv3_bu/config/config.py b/hermesv3_bu/config/config.py
index 26d4ce2..d9633f9 100755
--- a/hermesv3_bu/config/config.py
+++ b/hermesv3_bu/config/config.py
@@ -146,6 +146,7 @@ class Config(ArgParser):
         p.add_argument('--crop_operations_processors', required=True, type=int)
         p.add_argument('--crop_fertilizers_processors', required=True, type=int)
         p.add_argument('--agricultural_machinery_processors', required=True, type=int)
+        p.add_argument('--solvents_processors', required=True, type=int)
 
         p.add_argument('--speciation_map', required=False, help='...')
 
@@ -347,6 +348,16 @@ class Config(ArgParser):
         p.add_argument('--small_cities_weekly_profile', required=False, help='...')
         p.add_argument('--small_cities_monthly_profile', required=False, help='...')
 
+        # ***** SOLVENTS SECTOR *****
+        p.add_argument('--solvents_pollutants', required=False, help='...')
+        p.add_argument('--solvents_proxies_path', required=False, help='...')
+        p.add_argument('--solvents_yearly_emissions_by_nut2_path', required=False, help='...')
+        p.add_argument('--solvents_point_sources_shapefile', required=False, help='...')
+        p.add_argument('--solvents_monthly_profile', required=False, help='...')
+        p.add_argument('--solvents_weekly_profile', required=False, help='...')
+        p.add_argument('--solvents_hourly_profile', required=False, help='...')
+        p.add_argument('--solvents_speciation_profiles', required=False, help='...')
+
         arguments = p.parse_args()
 
         for item in vars(arguments):
@@ -382,6 +393,7 @@ class Config(ArgParser):
                 comm.Barrier()
         self.create_dir(arguments.auxiliary_files_path)
 
+        # Booleans
         arguments.do_traffic = arguments.traffic_processors > 0
         arguments.do_traffic_area = arguments.traffic_area_processors > 0
         arguments.do_aviation = arguments.aviation_processors > 0
@@ -393,6 +405,7 @@ class Config(ArgParser):
         arguments.do_crop_operations = arguments.crop_operations_processors > 0
         arguments.do_crop_fertilizers = arguments.crop_fertilizers_processors > 0
         arguments.do_agricultural_machinery = arguments.agricultural_machinery_processors > 0
+        arguments.do_solvents = arguments.solvents_processors > 0
 
         # Aviation lists
         arguments.airport_list = self._parse_list(arguments.airport_list)
@@ -458,6 +471,9 @@ class Config(ArgParser):
         # Traffic area lists
         arguments.traffic_area_pollutants = self._parse_list(arguments.traffic_area_pollutants)
 
+        # Solvents lists
+        arguments.solvents_pollutants = self._parse_list(arguments.solvents_pollutants)
+
         return arguments
 
     @staticmethod
diff --git a/hermesv3_bu/sectors/sector_manager.py b/hermesv3_bu/sectors/sector_manager.py
index 9f4f5ae..b8d4368 100755
--- a/hermesv3_bu/sectors/sector_manager.py
+++ b/hermesv3_bu/sectors/sector_manager.py
@@ -5,7 +5,7 @@ from hermesv3_bu.logger.log import Log
 from hermesv3_bu.tools.checker import error_exit
 
 SECTOR_LIST = ['traffic', 'traffic_area', 'aviation', 'point_sources', 'recreational_boats', 'shipping_port',
-               'residential', 'livestock', 'crop_operations', 'crop_fertilizers', 'agricultural_machinery']
+               'residential', 'livestock', 'crop_operations', 'crop_fertilizers', 'agricultural_machinery', 'solvents']
 
 
 class SectorManager(object):
@@ -204,6 +204,17 @@ class SectorManager(object):
                     arguments.traffic_area_small_cities_ef_file, arguments.small_cities_monthly_profile,
                     arguments.small_cities_weekly_profile, arguments.small_cities_hourly_profile
                 )
+            elif sector == 'solvents' and comm_world.Get_rank() in sector_procs:
+                from hermesv3_bu.sectors.solvent_sector import SolventSector
+                self.sector = SolventSector(
+                    comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger,
+                    arguments.auxiliary_files_path, grid, clip, date_array, arguments.solvents_pollutants,
+                    grid.vertical_desctiption, arguments.speciation_map, arguments.molecular_weights,
+                    arguments.solvents_speciation_profiles, arguments.solvents_monthly_profile,
+                    arguments.solvents_weekly_profile, arguments.solvents_hourly_profile,
+                    arguments.solvents_proxies_path, arguments.solvents_yearly_emissions_by_nut2_path,
+                    arguments.solvents_point_sources_shapefile, arguments.population_density_map,
+                    arguments.land_uses_path)
 
             color += 1
 
diff --git a/hermesv3_bu/sectors/solvent_sector.py b/hermesv3_bu/sectors/solvent_sector.py
new file mode 100755
index 0000000..7251efa
--- /dev/null
+++ b/hermesv3_bu/sectors/solvent_sector.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import timeit
+import geopandas as gpd
+import pandas as pd
+import numpy as np
+from hermesv3_bu.sectors.sector import Sector
+from hermesv3_bu.io_server.io_shapefile import IoShapefile
+from hermesv3_bu.io_server.io_raster import IoRaster
+from hermesv3_bu.tools.checker import check_files, error_exit
+
+
+class SolventSector(Sector):
+    def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
+                 speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
+                 weekly_profile_path, hourly_profile_path, proxies_path, yearly_emissions_by_nut2_path,
+                 point_sources_shapefile_path, population_raster_path, land_uses_raster_path):
+
+        spent_time = timeit.default_timer()
+        logger.write_log('===== SOLVENTS SECTOR =====')
+        check_files([speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
+                     weekly_profile_path, hourly_profile_path, proxies_path, yearly_emissions_by_nut2_path,
+                     point_sources_shapefile_path, population_raster_path, land_uses_raster_path])
+
+        super(SolventSector, self).__init__(
+            comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
+            monthly_profile_path, weekly_profile_path, hourly_profile_path, speciation_map_path,
+            speciation_profiles_path, molecular_weights_path)
+
+        exit()
\ No newline at end of file
-- 
GitLab


From 59484fc9efb4d66bbc9e571532441315b040b467 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 30 Aug 2019 18:16:52 +0200
Subject: [PATCH 04/22] Working on Solvent sector

---
 conf/hermes.conf                           |  3 +-
 hermesv3_bu/config/config.py               |  3 +-
 hermesv3_bu/sectors/point_source_sector.py |  2 +-
 hermesv3_bu/sectors/sector_manager.py      |  8 +--
 hermesv3_bu/sectors/solvent_sector.py      | 65 +++++++++++++++++++++-
 5 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/conf/hermes.conf b/conf/hermes.conf
index fc835e3..b44e9d4 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -148,7 +148,9 @@ solvents_processors = 1
 nuts3_shapefile = <input_dir>/shapefiles/nuts3/nuts3.shp
 nuts2_shapefile = <input_dir>/shapefiles/nuts2/nuts2.shp
 land_uses_path = <data_path>/ecmwf/clc/original_files/g250_clc12_v18_5a/g250_clc12_V18_5.tif
+land_use_by_nuts2_path =  <input_dir>/agriculture/land_use_ccaa.csv
 population_density_map = <data_path>/jrc/ghsl/original_files/GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0.tif
+population_nuts2 = <input_dir>/population_by_nuts2.csv
 
 [SPECIATION DATA]
 speciation_map = <input_dir>/profiles/speciation/map_base.csv
@@ -219,7 +221,6 @@ livestock_hourly_profiles = <input_dir>/profiles/temporal/livestock/hourly_profi
 livestock_speciation_profiles = <input_dir>/profiles/speciation/livestock/speciation_profiles_base.csv
 
 [AGRICULTURAL]
-land_use_by_nut_path =  <input_dir>/agriculture/land_use_ccaa.csv
 crop_by_nut_path = <input_dir>/agriculture/crops_ha_2017.csv
 crop_from_landuse_path = <input_dir>/agriculture/map_crops_landuse.csv
 
diff --git a/hermesv3_bu/config/config.py b/hermesv3_bu/config/config.py
index d9633f9..12fe53f 100755
--- a/hermesv3_bu/config/config.py
+++ b/hermesv3_bu/config/config.py
@@ -153,6 +153,7 @@ class Config(ArgParser):
         # ===== SHAPEFILES =====
         p.add_argument('--nuts3_shapefile', required=False, type=str, default='True')
         p.add_argument('--nuts2_shapefile', required=False, type=str, default='True')
+        p.add_argument('--population_nuts2', required=False, type=str, default='True')
 
         p.add_argument('--clipping', required=False, type=str, default=None,
                        help='To clip the domain into an specific zone. ' +
@@ -221,7 +222,7 @@ class Config(ArgParser):
 
         # ***** AGRICULTURAL SECTOR*****
         p.add_argument('--land_uses_path', required=False, help='...')
-        p.add_argument('--land_use_by_nut_path', required=False, help='...')
+        p.add_argument('--land_use_by_nuts2_path', required=False, help='...')
         p.add_argument('--crop_by_nut_path', required=False, help='...')
         p.add_argument('--crop_from_landuse_path', required=False, help='...')
 
diff --git a/hermesv3_bu/sectors/point_source_sector.py b/hermesv3_bu/sectors/point_source_sector.py
index fd71d8b..584e79d 100755
--- a/hermesv3_bu/sectors/point_source_sector.py
+++ b/hermesv3_bu/sectors/point_source_sector.py
@@ -98,7 +98,7 @@ class PointSourceSector(Sector):
         spec_res = links_spec - spec
         if len(spec_res) > 0:
             error_exit("The following speciation profile IDs reported in the point sources shapefile do not appear " +
-                       "in the speciation profiles file. {0}".format(month_res))
+                       "in the speciation profiles file. {0}".format(spec_res))
 
     def read_catalog_csv(self, catalog_path, sector_list):
         """
diff --git a/hermesv3_bu/sectors/sector_manager.py b/hermesv3_bu/sectors/sector_manager.py
index b8d4368..55a1f61 100755
--- a/hermesv3_bu/sectors/sector_manager.py
+++ b/hermesv3_bu/sectors/sector_manager.py
@@ -86,7 +86,7 @@ class SectorManager(object):
                     arguments.crop_operations_monthly_profiles, arguments.crop_operations_weekly_profiles,
                     arguments.crop_operations_hourly_profiles, arguments.speciation_map,
                     arguments.crop_operations_speciation_profiles, arguments.molecular_weights,
-                    arguments.land_use_by_nut_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path)
+                    arguments.land_use_by_nuts2_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path)
 
             elif sector == 'crop_fertilizers' and comm_world.Get_rank() in sector_procs:
                 from hermesv3_bu.sectors.agricultural_crop_fertilizers_sector import AgriculturalCropFertilizersSector
@@ -99,7 +99,7 @@ class SectorManager(object):
                     arguments.crop_fertilizers_list, arguments.nuts2_shapefile, arguments.land_uses_path,
                     arguments.crop_fertilizers_hourly_profiles, arguments.speciation_map,
                     arguments.crop_fertilizers_speciation_profiles, arguments.molecular_weights,
-                    arguments.land_use_by_nut_path,  arguments.crop_by_nut_path, arguments.crop_from_landuse_path,
+                    arguments.land_use_by_nuts2_path,  arguments.crop_by_nut_path, arguments.crop_from_landuse_path,
                     arguments.cultivated_ratio, arguments.fertilizers_rate, arguments.crop_f_parameter,
                     arguments.crop_f_fertilizers, arguments.gridded_ph, arguments.gridded_cec,
                     arguments.fertilizers_denominator_yearly_factor_path, arguments.crop_calendar,
@@ -119,7 +119,7 @@ class SectorManager(object):
                     arguments.crop_machinery_monthly_profiles, arguments.crop_machinery_weekly_profiles,
                     arguments.crop_machinery_hourly_profiles,
                     arguments.speciation_map, arguments.crop_machinery_speciation_profiles, arguments.molecular_weights,
-                    arguments.land_use_by_nut_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path,
+                    arguments.land_use_by_nuts2_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path,
                     arguments.nuts3_shapefile, arguments.crop_machinery_deterioration_factor_path,
                     arguments.crop_machinery_load_factor_path, arguments.crop_machinery_vehicle_ratio_path,
                     arguments.crop_machinery_vehicle_units_path, arguments.crop_machinery_vehicle_workhours_path,
@@ -214,7 +214,7 @@ class SectorManager(object):
                     arguments.solvents_weekly_profile, arguments.solvents_hourly_profile,
                     arguments.solvents_proxies_path, arguments.solvents_yearly_emissions_by_nut2_path,
                     arguments.solvents_point_sources_shapefile, arguments.population_density_map,
-                    arguments.land_uses_path)
+                    arguments.population_nuts2, arguments.land_uses_path, arguments.land_use_by_nuts2_path)
 
             color += 1
 
diff --git a/hermesv3_bu/sectors/solvent_sector.py b/hermesv3_bu/sectors/solvent_sector.py
index 7251efa..e713a85 100755
--- a/hermesv3_bu/sectors/solvent_sector.py
+++ b/hermesv3_bu/sectors/solvent_sector.py
@@ -16,17 +16,76 @@ class SolventSector(Sector):
     def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
                  speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
                  weekly_profile_path, hourly_profile_path, proxies_path, yearly_emissions_by_nut2_path,
-                 point_sources_shapefile_path, population_raster_path, land_uses_raster_path):
+                 point_sources_shapefile_path, population_raster_path, population_nuts2_path, land_uses_raster_path,
+                 land_uses_nuts2_path):
 
         spent_time = timeit.default_timer()
         logger.write_log('===== SOLVENTS SECTOR =====')
         check_files([speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
                      weekly_profile_path, hourly_profile_path, proxies_path, yearly_emissions_by_nut2_path,
-                     point_sources_shapefile_path, population_raster_path, land_uses_raster_path])
+                     point_sources_shapefile_path, population_raster_path,
+                     # population_nuts2_path,
+                     land_uses_raster_path, land_uses_nuts2_path])
 
         super(SolventSector, self).__init__(
             comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
             monthly_profile_path, weekly_profile_path, hourly_profile_path, speciation_map_path,
             speciation_profiles_path, molecular_weights_path)
 
-        exit()
\ No newline at end of file
+        self.proxies = self.read_proxies(proxies_path)
+        self.check_profiles()
+        self.yearly_emissions = self.read_yearly_emissions(yearly_emissions_by_nut2_path)
+
+        # print(self.speciation_profile)
+        # print(self.monthly_profiles)
+        # print(self.weekly_profiles)
+        # print(self.hourly_profiles)
+        print(self.proxies.head())
+        print(self.proxies.columns.values)
+        exit()
+
+    def read_proxies(self, path):
+        proxies_df = pd.read_csv(path, dtype=str)
+
+        proxies_df.set_index('snap', inplace=True)
+        proxies_df = proxies_df.loc[proxies_df['CONS'] == '1']
+        proxies_df.drop(columns=['activity', 'CONS', 'gnfr'], inplace=True)
+
+        return proxies_df
+    
+    def check_profiles(self):
+        # Checking monthly profiles IDs
+        links_month = set(np.unique(self.proxies['P_month'].dropna().values))
+        month = set(self.monthly_profiles.index.values)
+        month_res = links_month - month
+        if len(month_res) > 0:
+            error_exit("The following monthly profile IDs reported in the solvent proxies CSV file do not appear " +
+                       "in the monthly profiles file. {0}".format(month_res))
+        # Checking weekly profiles IDs
+        links_week = set(np.unique(self.proxies['P_week'].dropna().values))
+        week = set(self.weekly_profiles.index.values)
+        week_res = links_week - week
+        if len(week_res) > 0:
+            error_exit("The following weekly profile IDs reported in the solvent proxies CSV file do not appear " +
+                       "in the weekly profiles file. {0}".format(week_res))
+        # Checking hourly profiles IDs
+        links_hour = set(np.unique(self.proxies['P_hour'].dropna().values))
+        hour = set(self.hourly_profiles.index.values)
+        hour_res = links_hour - hour
+        if len(hour_res) > 0:
+            error_exit("The following hourly profile IDs reported in the solvent proxies CSV file do not appear " +
+                       "in the hourly profiles file. {0}".format(hour_res))
+        # Checking speciation profiles IDs
+        links_spec = set(np.unique(self.proxies['P_spec'].dropna().values))
+        spec = set(self.speciation_profile.index.values)
+        spec_res = links_spec - spec
+        if len(spec_res) > 0:
+            error_exit("The following speciation profile IDs reported in the solvent proxies CSV file do not appear " +
+                       "in the speciation profiles file. {0}".format(spec_res))
+
+    def read_yearly_emissions(self, path):
+        year_emis = pd.read_csv(path, dtype={'nuts2_id': str, 'snap': str, 'nmvoc': np.float64})
+        year_emis.set_index(['nuts2_id', 'snap'], inplace=True)
+        year_emis.drop(columns=['gnfr_description', 'gnfr', 'snap_description', 'nuts2_na'], inplace=True)
+        return year_emis
+
-- 
GitLab


From ac42eb7007f0e46953a8f863a49bfacca757f472 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 3 Sep 2019 11:27:05 +0200
Subject: [PATCH 05/22] Starting Solvent sector

---
 conf/hermes.conf                       |  17 +--
 hermesv3_bu/io_server/io_raster.py     |  79 ++++++++++++-
 hermesv3_bu/io_server/io_server.py     |   6 +
 hermesv3_bu/io_server/io_shapefile.py  |  26 +++++
 hermesv3_bu/sectors/sector.py          |   9 +-
 hermesv3_bu/sectors/sector_manager.py  |   7 +-
 hermesv3_bu/sectors/solvent_sector.py  |  91 ---------------
 hermesv3_bu/sectors/solvents_sector.py | 155 +++++++++++++++++++++++++
 8 files changed, 283 insertions(+), 107 deletions(-)
 delete mode 100755 hermesv3_bu/sectors/solvent_sector.py
 create mode 100755 hermesv3_bu/sectors/solvents_sector.py

diff --git a/conf/hermes.conf b/conf/hermes.conf
index b44e9d4..fb01ca9 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -64,6 +64,14 @@ vertical_description = <input_dir>/profiles/vertical/MONARCH_Global_48layers_ver
     x_0 = 253151.59375
     y_0 = 43862.90625
 
+    # IP
+    #nx = 397
+    #ny = 397
+    #inc_x = 4000
+    #inc_y = 4000
+    #x_0 = -807847.688
+    #y_0 = -797137.125
+
     # EUROPA
     #nx = 478
     #ny = 398
@@ -72,13 +80,6 @@ vertical_description = <input_dir>/profiles/vertical/MONARCH_Global_48layers_ver
     #x_0 = -2131849.000point_sources
     #y_0 = -2073137.875
 
-    # IP
-    # nx = 397
-    # ny = 397
-    # inc_x = 4000
-    # inc_y = 4000
-    # x_0 = -807847.688
-    # y_0 = -797137.125
 
     # MAD
     #nx = 146
@@ -141,7 +142,7 @@ recreational_boats_processors = 0
 point_sources_processors = 0
 traffic_processors = 0
 traffic_area_processors = 0
-solvents_processors = 1
+solvents_processors = 4
 
 
 [SHAPEFILES]
diff --git a/hermesv3_bu/io_server/io_raster.py b/hermesv3_bu/io_server/io_raster.py
index 0f6f1a6..422e1fb 100755
--- a/hermesv3_bu/io_server/io_raster.py
+++ b/hermesv3_bu/io_server/io_raster.py
@@ -10,6 +10,7 @@ import numpy as np
 from shapely.geometry import Polygon
 
 from hermesv3_bu.io_server.io_server import IoServer
+from hermesv3_bu.io_server.io_shapefile import IoShapefile
 from hermesv3_bu.tools.checker import check_files, error_exit
 
 
@@ -202,7 +203,6 @@ class IoRaster(IoServer):
         :param out_path:
         :param write:
         :param crs:
-        :param rank:
         :param nodata:
         :return:
         """
@@ -313,3 +313,80 @@ class IoRaster(IoServer):
             gdf = gpd.read_file(out_path)
         gdf.set_index('CELL_ID', inplace=True)
         return gdf
+
+    def to_shapefile_parallel(self, raster_path, gather=False, bcast=False, crs=None, nodata=0):
+        spent_time = timeit.default_timer()
+        if self.comm.Get_rank() == 0:
+            ds = rasterio.open(raster_path)
+            grid_info = ds.transform
+            print('TIME 1:', timeit.default_timer() - spent_time)
+            # TODO remove when new version will be installed
+            if rasterio.__version__ == '0.36.0':
+                lons = np.arange(ds.width) * grid_info[1] + grid_info[0]
+                lats = np.arange(ds.height) * grid_info[5] + grid_info[3]
+            elif rasterio.__version__ == '1.0.21':
+                lons = np.arange(ds.width) * grid_info[0] + grid_info[2]
+                lats = np.arange(ds.height) * grid_info[4] + grid_info[5]
+            else:
+                lons = np.arange(ds.width) * grid_info[0] + grid_info[2]
+                lats = np.arange(ds.height) * grid_info[4] + grid_info[5]
+            print('TIME 2:', timeit.default_timer() - spent_time)
+            # 1D to 2D
+            c_lats = np.array([lats] * len(lons)).T.flatten()
+            c_lons = np.array([lons] * len(lats)).flatten()
+            print('TIME 3:', timeit.default_timer() - spent_time)
+            del lons, lats
+            if rasterio.__version__ == '0.36.0':
+                b_lons = self.create_bounds(c_lons, grid_info[1], number_vertices=4) + grid_info[1] / 2
+                b_lats = self.create_bounds(c_lats, grid_info[1], number_vertices=4, inverse=True) + grid_info[5] / 2
+            elif rasterio.__version__ == '1.0.21':
+                b_lons = self.create_bounds(c_lons, grid_info[0], number_vertices=4) + grid_info[0] / 2
+                b_lats = self.create_bounds(c_lats, grid_info[4], number_vertices=4, inverse=True) + grid_info[4] / 2
+            else:
+                b_lons = self.create_bounds(c_lons, grid_info[0], number_vertices=4) + grid_info[0] / 2
+                b_lats = self.create_bounds(c_lats, grid_info[4], number_vertices=4, inverse=True) + grid_info[4] / 2
+            print('TIME 4:', timeit.default_timer() - spent_time)
+            b_lats = b_lats.reshape((b_lats.shape[1], b_lats.shape[2]))
+            b_lons = b_lons.reshape((b_lons.shape[1], b_lons.shape[2]))
+            print('TIME 5:', timeit.default_timer() - spent_time)
+            gdf = gpd.GeoDataFrame(ds.read(1).flatten(), columns=['data'], index=range(b_lons.shape[0]), crs=ds.crs)
+            gdf['geometry'] = None
+            # nodata_i = gdf['data'] != nodata
+            # print(nodata_i)
+            # print(b_lats)
+            # gdf = gdf[nodata_i]
+            # b_lons = b_lons[nodata_i, :]
+            # b_lats = b_lats[nodata_i, :]
+            # print(gdf)
+        else:
+            gdf = None
+            b_lons = None
+            b_lats = None
+        self.comm.Barrier()
+        gdf = IoShapefile(self.comm).split_shapefile(gdf)
+
+        b_lons = IoShapefile(self.comm).split_shapefile(b_lons)
+        b_lats = IoShapefile(self.comm).split_shapefile(b_lats)
+        print('TIME 6:', timeit.default_timer() - spent_time)
+        i = 0
+        for j, df_aux in gdf.iterrows():
+            gdf.loc[j, 'geometry'] = Polygon([(b_lons[i, 0], b_lats[i, 0]),
+                                              (b_lons[i, 1], b_lats[i, 1]),
+                                              (b_lons[i, 2], b_lats[i, 2]),
+                                              (b_lons[i, 3], b_lats[i, 3]),
+                                              (b_lons[i, 0], b_lats[i, 0])])
+            i += 1
+
+        print('TIME 7:', timeit.default_timer() - spent_time)
+        gdf['CELL_ID'] = gdf.index
+        print('TIME 8:', timeit.default_timer() - spent_time)
+        gdf = gdf[gdf['data'] != nodata]
+        if crs is not None:
+            gdf = gdf.to_crs(crs)
+
+        if gather and not bcast:
+            gdf = IoShapefile(self.comm).gather_shapefile(gdf)
+        elif gather and bcast:
+            gdf = IoShapefile(self.comm).gather_bcast_shapefile(gdf)
+        return gdf
+
diff --git a/hermesv3_bu/io_server/io_server.py b/hermesv3_bu/io_server/io_server.py
index 694798f..46bd918 100755
--- a/hermesv3_bu/io_server/io_server.py
+++ b/hermesv3_bu/io_server/io_server.py
@@ -1,6 +1,12 @@
 #!/usr/bin/env python
 
+from mpi4py import MPI
+
 
 class IoServer(object):
+    """
+    :param comm: Communicator object
+    :type comm: MPI.Comm
+    """
     def __init__(self, comm):
         self.comm = comm
diff --git a/hermesv3_bu/io_server/io_shapefile.py b/hermesv3_bu/io_server/io_shapefile.py
index d4a5df1..a6d6f5a 100755
--- a/hermesv3_bu/io_server/io_shapefile.py
+++ b/hermesv3_bu/io_server/io_shapefile.py
@@ -100,6 +100,32 @@ class IoShapefile(IoServer):
 
         return data
 
+    def gather_bcast_shapefile(self, data, rank=0):
+
+        if self.comm.Get_size() == 1:
+            data = data
+        else:
+            data = self.comm.gather(data, root=rank)
+            if self.comm.Get_rank() == rank:
+                data = pd.concat(data)
+            else:
+                data = None
+            data = self.comm.bcast(data, root=rank)
+
+        return data
+
+    def gather_shapefile(self, data, rank=0):
+
+        if self.comm.Get_size() == 1:
+            data = data
+        else:
+            data = self.comm.gather(data, root=rank)
+            if self.comm.Get_rank() == rank:
+                data = pd.concat(data)
+            else:
+                data = None
+        return data
+
     def balance(self, data, rank=0):
 
         data = self.comm.gather(data, root=rank)
diff --git a/hermesv3_bu/sectors/sector.py b/hermesv3_bu/sectors/sector.py
index f49e379..f4cfc52 100755
--- a/hermesv3_bu/sectors/sector.py
+++ b/hermesv3_bu/sectors/sector.py
@@ -3,12 +3,13 @@
 import sys
 import os
 import timeit
-from hermesv3_bu.logger.log import Log
 import numpy as np
 import pandas as pd
 import geopandas as gpd
-from geopandas import GeoDataFrame
 from mpi4py import MPI
+
+from geopandas import GeoDataFrame
+from hermesv3_bu.logger.log import Log
 from hermesv3_bu.grids.grid import Grid
 
 
@@ -445,8 +446,8 @@ class Sector(object):
         spent_time = timeit.default_timer()
         nut_shapefile = gpd.read_file(nut_shapefile_path).to_crs(shapefile.crs)
         shapefile = gpd.sjoin(shapefile, nut_shapefile.loc[:, [nut_value, 'geometry']], how='left', op='intersects')
-
-        shapefile = shapefile[~shapefile.index.duplicated(keep='first')]
+        del nut_shapefile
+        # shapefile = shapefile[~shapefile.index.duplicated(keep='first')]
         shapefile.drop('index_right', axis=1, inplace=True)
 
         shapefile.rename(columns={nut_value: 'nut_code'}, inplace=True)
diff --git a/hermesv3_bu/sectors/sector_manager.py b/hermesv3_bu/sectors/sector_manager.py
index 55a1f61..5813b71 100755
--- a/hermesv3_bu/sectors/sector_manager.py
+++ b/hermesv3_bu/sectors/sector_manager.py
@@ -205,8 +205,8 @@ class SectorManager(object):
                     arguments.small_cities_weekly_profile, arguments.small_cities_hourly_profile
                 )
             elif sector == 'solvents' and comm_world.Get_rank() in sector_procs:
-                from hermesv3_bu.sectors.solvent_sector import SolventSector
-                self.sector = SolventSector(
+                from hermesv3_bu.sectors.solvents_sector import SolventsSector
+                self.sector = SolventsSector(
                     comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger,
                     arguments.auxiliary_files_path, grid, clip, date_array, arguments.solvents_pollutants,
                     grid.vertical_desctiption, arguments.speciation_map, arguments.molecular_weights,
@@ -214,7 +214,8 @@ class SectorManager(object):
                     arguments.solvents_weekly_profile, arguments.solvents_hourly_profile,
                     arguments.solvents_proxies_path, arguments.solvents_yearly_emissions_by_nut2_path,
                     arguments.solvents_point_sources_shapefile, arguments.population_density_map,
-                    arguments.population_nuts2, arguments.land_uses_path, arguments.land_use_by_nuts2_path)
+                    arguments.population_nuts2, arguments.land_uses_path, arguments.land_use_by_nuts2_path,
+                    arguments.nuts2_shapefile)
 
             color += 1
 
diff --git a/hermesv3_bu/sectors/solvent_sector.py b/hermesv3_bu/sectors/solvent_sector.py
deleted file mode 100755
index e713a85..0000000
--- a/hermesv3_bu/sectors/solvent_sector.py
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import os
-import timeit
-import geopandas as gpd
-import pandas as pd
-import numpy as np
-from hermesv3_bu.sectors.sector import Sector
-from hermesv3_bu.io_server.io_shapefile import IoShapefile
-from hermesv3_bu.io_server.io_raster import IoRaster
-from hermesv3_bu.tools.checker import check_files, error_exit
-
-
-class SolventSector(Sector):
-    def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
-                 speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
-                 weekly_profile_path, hourly_profile_path, proxies_path, yearly_emissions_by_nut2_path,
-                 point_sources_shapefile_path, population_raster_path, population_nuts2_path, land_uses_raster_path,
-                 land_uses_nuts2_path):
-
-        spent_time = timeit.default_timer()
-        logger.write_log('===== SOLVENTS SECTOR =====')
-        check_files([speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
-                     weekly_profile_path, hourly_profile_path, proxies_path, yearly_emissions_by_nut2_path,
-                     point_sources_shapefile_path, population_raster_path,
-                     # population_nuts2_path,
-                     land_uses_raster_path, land_uses_nuts2_path])
-
-        super(SolventSector, self).__init__(
-            comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
-            monthly_profile_path, weekly_profile_path, hourly_profile_path, speciation_map_path,
-            speciation_profiles_path, molecular_weights_path)
-
-        self.proxies = self.read_proxies(proxies_path)
-        self.check_profiles()
-        self.yearly_emissions = self.read_yearly_emissions(yearly_emissions_by_nut2_path)
-
-        # print(self.speciation_profile)
-        # print(self.monthly_profiles)
-        # print(self.weekly_profiles)
-        # print(self.hourly_profiles)
-        print(self.proxies.head())
-        print(self.proxies.columns.values)
-        exit()
-
-    def read_proxies(self, path):
-        proxies_df = pd.read_csv(path, dtype=str)
-
-        proxies_df.set_index('snap', inplace=True)
-        proxies_df = proxies_df.loc[proxies_df['CONS'] == '1']
-        proxies_df.drop(columns=['activity', 'CONS', 'gnfr'], inplace=True)
-
-        return proxies_df
-    
-    def check_profiles(self):
-        # Checking monthly profiles IDs
-        links_month = set(np.unique(self.proxies['P_month'].dropna().values))
-        month = set(self.monthly_profiles.index.values)
-        month_res = links_month - month
-        if len(month_res) > 0:
-            error_exit("The following monthly profile IDs reported in the solvent proxies CSV file do not appear " +
-                       "in the monthly profiles file. {0}".format(month_res))
-        # Checking weekly profiles IDs
-        links_week = set(np.unique(self.proxies['P_week'].dropna().values))
-        week = set(self.weekly_profiles.index.values)
-        week_res = links_week - week
-        if len(week_res) > 0:
-            error_exit("The following weekly profile IDs reported in the solvent proxies CSV file do not appear " +
-                       "in the weekly profiles file. {0}".format(week_res))
-        # Checking hourly profiles IDs
-        links_hour = set(np.unique(self.proxies['P_hour'].dropna().values))
-        hour = set(self.hourly_profiles.index.values)
-        hour_res = links_hour - hour
-        if len(hour_res) > 0:
-            error_exit("The following hourly profile IDs reported in the solvent proxies CSV file do not appear " +
-                       "in the hourly profiles file. {0}".format(hour_res))
-        # Checking speciation profiles IDs
-        links_spec = set(np.unique(self.proxies['P_spec'].dropna().values))
-        spec = set(self.speciation_profile.index.values)
-        spec_res = links_spec - spec
-        if len(spec_res) > 0:
-            error_exit("The following speciation profile IDs reported in the solvent proxies CSV file do not appear " +
-                       "in the speciation profiles file. {0}".format(spec_res))
-
-    def read_yearly_emissions(self, path):
-        year_emis = pd.read_csv(path, dtype={'nuts2_id': str, 'snap': str, 'nmvoc': np.float64})
-        year_emis.set_index(['nuts2_id', 'snap'], inplace=True)
-        year_emis.drop(columns=['gnfr_description', 'gnfr', 'snap_description', 'nuts2_na'], inplace=True)
-        return year_emis
-
diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
new file mode 100755
index 0000000..772fa3a
--- /dev/null
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import timeit
+import geopandas as gpd
+import pandas as pd
+import numpy as np
+from hermesv3_bu.sectors.sector import Sector
+from hermesv3_bu.io_server.io_shapefile import IoShapefile
+from hermesv3_bu.io_server.io_raster import IoRaster
+from hermesv3_bu.tools.checker import check_files, error_exit
+
+PROXY_NAMES = {'boat_building': 'boat',
+               'automobile_manufacturing': 'automobile',
+               'car_repairing': 'car_repair',
+               'dry_cleaning': 'gry_clean',
+               'rubber_processing': 'rubber',
+               'paints_manufacturing': 'paints',
+               'inks_manufacturing': 'ink',
+               'glues_manufacturing': 'glues',
+               'pharmaceutical_products_manufacturing': 'pharma',
+               'leather_taning': 'leather',
+               'printing': 'printing',
+               'automobile_manufacturing': 'automobile'
+               }
+
+
+class SolventsSector(Sector):
+    def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
+                 speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
+                 weekly_profile_path, hourly_profile_path, proxies_map_path, yearly_emissions_by_nut2_path,
+                 point_sources_shapefile_path, population_raster_path, population_nuts2_path, land_uses_raster_path,
+                 land_uses_nuts2_path, nut2_shapefile_path):
+
+        spent_time = timeit.default_timer()
+        logger.write_log('===== SOLVENTS SECTOR =====')
+        check_files([speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
+                     weekly_profile_path, hourly_profile_path, proxies_map_path, yearly_emissions_by_nut2_path,
+                     point_sources_shapefile_path, population_raster_path,
+                     # population_nuts2_path,
+                     land_uses_raster_path, land_uses_nuts2_path, nut2_shapefile_path])
+
+        super(SolventsSector, self).__init__(
+            comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
+            monthly_profile_path, weekly_profile_path, hourly_profile_path, speciation_map_path,
+            speciation_profiles_path, molecular_weights_path)
+
+        self.proxies_map = self.read_proxies(proxies_map_path)
+        self.check_profiles()
+
+        self.proxy = self.get_proxy_shapefile(population_raster_path, population_nuts2_path, nut2_shapefile_path)
+
+        self.yearly_emissions = self.read_yearly_emissions(yearly_emissions_by_nut2_path)
+        exit()
+        self.logger.write_time_log('SolventsSector', '__init__', timeit.default_timer() - spent_time)
+
+    def read_proxies(self, path):
+        spent_time = timeit.default_timer()
+        proxies_df = pd.read_csv(path, dtype=str)
+
+        proxies_df.set_index('snap', inplace=True)
+        proxies_df = proxies_df.loc[proxies_df['CONS'] == '1']
+        proxies_df.drop(columns=['activity', 'CONS', 'gnfr'], inplace=True)
+
+        proxies_df.loc[proxies_df['spatial_proxy'] == 'population', 'proxy_name'] = 'population'
+        proxies_df.loc[proxies_df['spatial_proxy'] == 'land_use', 'proxy_name'] = \
+            'lu_' + proxies_df['land_use_code'].replace(' ', '_', regex=True)
+        proxies_df.loc[proxies_df['spatial_proxy'] == 'shapefile', 'proxy_name'] = \
+            proxies_df['industry_code'].map(PROXY_NAMES)
+
+        self.logger.write_time_log('SolventsSector', 'read_proxies', timeit.default_timer() - spent_time)
+        return proxies_df
+    
+    def check_profiles(self):
+        spent_time = timeit.default_timer()
+        # Checking monthly profiles IDs
+        links_month = set(np.unique(self.proxies_map['P_month'].dropna().values))
+        month = set(self.monthly_profiles.index.values)
+        month_res = links_month - month
+        if len(month_res) > 0:
+            error_exit("The following monthly profile IDs reported in the solvent proxies CSV file do not appear " +
+                       "in the monthly profiles file. {0}".format(month_res))
+        # Checking weekly profiles IDs
+        links_week = set(np.unique(self.proxies_map['P_week'].dropna().values))
+        week = set(self.weekly_profiles.index.values)
+        week_res = links_week - week
+        if len(week_res) > 0:
+            error_exit("The following weekly profile IDs reported in the solvent proxies CSV file do not appear " +
+                       "in the weekly profiles file. {0}".format(week_res))
+        # Checking hourly profiles IDs
+        links_hour = set(np.unique(self.proxies_map['P_hour'].dropna().values))
+        hour = set(self.hourly_profiles.index.values)
+        hour_res = links_hour - hour
+        if len(hour_res) > 0:
+            error_exit("The following hourly profile IDs reported in the solvent proxies CSV file do not appear " +
+                       "in the hourly profiles file. {0}".format(hour_res))
+        # Checking speciation profiles IDs
+        links_spec = set(np.unique(self.proxies_map['P_spec'].dropna().values))
+        spec = set(self.speciation_profile.index.values)
+        spec_res = links_spec - spec
+        if len(spec_res) > 0:
+            error_exit("The following speciation profile IDs reported in the solvent proxies CSV file do not appear " +
+                       "in the speciation profiles file. {0}".format(spec_res))
+
+        self.logger.write_time_log('SolventsSector', 'check_profiles', timeit.default_timer() - spent_time)
+        return True
+
+    def read_yearly_emissions(self, path):
+        spent_time = timeit.default_timer()
+
+        year_emis = pd.read_csv(path, dtype={'nuts2_id': str, 'snap': str, 'nmvoc': np.float64})
+        year_emis.set_index(['nuts2_id', 'snap'], inplace=True)
+        year_emis.drop(columns=['gnfr_description', 'gnfr', 'snap_description', 'nuts2_na'], inplace=True)
+
+        self.logger.write_time_log('SolventsSector', 'read_yearly_emissions', timeit.default_timer() - spent_time)
+        return year_emis
+
+    def get_population_proxie(self, pop_raster_path, pop_by_nut2_path, nut2_shapefile_path):
+
+        if self.comm.Get_rank() == 0:
+            pop_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly(
+                pop_raster_path, self.clip.shapefile, os.path.join(self.auxiliary_dir, 'solvents', 'pop.tif'))
+        pop_shp = IoRaster(self.comm).to_shapefile_parallel(pop_raster_path, gather=True, bcast=False,
+                                                            crs={'init': 'epsg:4326'})
+        if self.comm.Get_rank() == 0:
+            pop_shp.rename(columns={'data': 'population'}, inplace=True)
+            pop_shp = self.add_nut_code(pop_shp, nut2_shapefile_path, nut_value='nuts2_id')
+            pop_shp.to_file('/gpfs/projects/bsc32/bsc32538/temp/pop_nut.shp')
+        pop_shp = IoShapefile(self.comm).split_shapefile(pop_shp)
+
+        print(pop_shp)
+        # if self.comm.Get_rank() == 0:
+        #     pop_shp.to_file('~/temp/pop{0}.shp'.format(self.comm.Get_size()))
+        #     print(pop_shp)
+        exit()
+
+    def get_proxy_shapefile(self, population_raster_path, population_nuts2_path, nut2_shapefile_path):
+
+        proxies_list = np.unique(self.proxies_map['proxy_name'])
+        proxies_list = np.unique(['population'])
+        proxy_shp_name = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.shp')
+        if not os.path.exists(proxy_shp_name):
+            for proxy in proxies_list:
+                proxy_shp = self.grid.shapefile.copy()
+                if proxy == 'population':
+                    proxy_shp['population'] = self.get_population_proxie(population_raster_path, population_nuts2_path,
+                                                                         nut2_shapefile_path)
+        else:
+            pass
+        print(proxy_shp)
+        print(proxies_list)
+
+
+
-- 
GitLab


From 771f7f014c8bad5192c0022afc9785b931d1288f Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 3 Sep 2019 11:36:22 +0200
Subject: [PATCH 06/22] Starting Solvent sector

---
 hermesv3_bu/sectors/solvents_sector.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 772fa3a..0fb557e 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -126,7 +126,6 @@ class SolventsSector(Sector):
         if self.comm.Get_rank() == 0:
             pop_shp.rename(columns={'data': 'population'}, inplace=True)
             pop_shp = self.add_nut_code(pop_shp, nut2_shapefile_path, nut_value='nuts2_id')
-            pop_shp.to_file('/gpfs/projects/bsc32/bsc32538/temp/pop_nut.shp')
         pop_shp = IoShapefile(self.comm).split_shapefile(pop_shp)
 
         print(pop_shp)
-- 
GitLab


From c642cae3dbf0c587e2bd058651f3636d1dae8fa1 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 3 Sep 2019 12:09:07 +0200
Subject: [PATCH 07/22] WIP

---
 hermesv3_bu/sectors/solvents_sector.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 0fb557e..454ee37 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -149,6 +149,3 @@ class SolventsSector(Sector):
             pass
         print(proxy_shp)
         print(proxies_list)
-
-
-
-- 
GitLab


From 1e3bb556053811d09cde373925e406e7ffecaafe Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 3 Sep 2019 12:48:30 +0200
Subject: [PATCH 08/22] WIP

---
 conf/hermes.conf                       |  2 +-
 hermesv3_bu/io_server/io_raster.py     | 14 +++++---------
 hermesv3_bu/io_server/io_shapefile.py  |  8 ++++++++
 hermesv3_bu/sectors/solvents_sector.py | 18 +++++++++++++++++-
 4 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/conf/hermes.conf b/conf/hermes.conf
index 7c04c56..aec97cf 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -151,7 +151,7 @@ nuts2_shapefile = <input_dir>/shapefiles/nuts2/nuts2.shp
 land_uses_path = <data_path>/ecmwf/clc/original_files/g250_clc12_v18_5a/g250_clc12_V18_5.tif
 land_use_by_nuts2_path =  <input_dir>/agriculture/land_use_ccaa.csv
 population_density_map = <data_path>/jrc/ghsl/original_files/GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0.tif
-population_nuts2 = <input_dir>/population_by_nuts2.csv
+population_nuts2 = <input_dir>/shapefiles/nuts2/pop_by_nut2.csv
 
 [SPECIATION DATA]
 speciation_map = <input_dir>/profiles/speciation/map_base.csv
diff --git a/hermesv3_bu/io_server/io_raster.py b/hermesv3_bu/io_server/io_raster.py
index 422e1fb..b331cdf 100755
--- a/hermesv3_bu/io_server/io_raster.py
+++ b/hermesv3_bu/io_server/io_raster.py
@@ -319,7 +319,7 @@ class IoRaster(IoServer):
         if self.comm.Get_rank() == 0:
             ds = rasterio.open(raster_path)
             grid_info = ds.transform
-            print('TIME 1:', timeit.default_timer() - spent_time)
+
             # TODO remove when new version will be installed
             if rasterio.__version__ == '0.36.0':
                 lons = np.arange(ds.width) * grid_info[1] + grid_info[0]
@@ -330,11 +330,10 @@ class IoRaster(IoServer):
             else:
                 lons = np.arange(ds.width) * grid_info[0] + grid_info[2]
                 lats = np.arange(ds.height) * grid_info[4] + grid_info[5]
-            print('TIME 2:', timeit.default_timer() - spent_time)
+
             # 1D to 2D
             c_lats = np.array([lats] * len(lons)).T.flatten()
             c_lons = np.array([lons] * len(lats)).flatten()
-            print('TIME 3:', timeit.default_timer() - spent_time)
             del lons, lats
             if rasterio.__version__ == '0.36.0':
                 b_lons = self.create_bounds(c_lons, grid_info[1], number_vertices=4) + grid_info[1] / 2
@@ -345,10 +344,10 @@ class IoRaster(IoServer):
             else:
                 b_lons = self.create_bounds(c_lons, grid_info[0], number_vertices=4) + grid_info[0] / 2
                 b_lats = self.create_bounds(c_lats, grid_info[4], number_vertices=4, inverse=True) + grid_info[4] / 2
-            print('TIME 4:', timeit.default_timer() - spent_time)
+
             b_lats = b_lats.reshape((b_lats.shape[1], b_lats.shape[2]))
             b_lons = b_lons.reshape((b_lons.shape[1], b_lons.shape[2]))
-            print('TIME 5:', timeit.default_timer() - spent_time)
+
             gdf = gpd.GeoDataFrame(ds.read(1).flatten(), columns=['data'], index=range(b_lons.shape[0]), crs=ds.crs)
             gdf['geometry'] = None
             # nodata_i = gdf['data'] != nodata
@@ -367,7 +366,7 @@ class IoRaster(IoServer):
 
         b_lons = IoShapefile(self.comm).split_shapefile(b_lons)
         b_lats = IoShapefile(self.comm).split_shapefile(b_lats)
-        print('TIME 6:', timeit.default_timer() - spent_time)
+
         i = 0
         for j, df_aux in gdf.iterrows():
             gdf.loc[j, 'geometry'] = Polygon([(b_lons[i, 0], b_lats[i, 0]),
@@ -377,9 +376,7 @@ class IoRaster(IoServer):
                                               (b_lons[i, 0], b_lats[i, 0])])
             i += 1
 
-        print('TIME 7:', timeit.default_timer() - spent_time)
         gdf['CELL_ID'] = gdf.index
-        print('TIME 8:', timeit.default_timer() - spent_time)
         gdf = gdf[gdf['data'] != nodata]
         if crs is not None:
             gdf = gdf.to_crs(crs)
@@ -389,4 +386,3 @@ class IoRaster(IoServer):
         elif gather and bcast:
             gdf = IoShapefile(self.comm).gather_bcast_shapefile(gdf)
         return gdf
-
diff --git a/hermesv3_bu/io_server/io_shapefile.py b/hermesv3_bu/io_server/io_shapefile.py
index a6d6f5a..59ece64 100755
--- a/hermesv3_bu/io_server/io_shapefile.py
+++ b/hermesv3_bu/io_server/io_shapefile.py
@@ -9,6 +9,7 @@ import pandas as pd
 import geopandas as gpd
 from mpi4py import MPI
 
+from geopandas import GeoDataFrame
 from hermesv3_bu.io_server.io_server import IoServer
 from hermesv3_bu.tools.checker import check_files
 
@@ -88,6 +89,13 @@ class IoShapefile(IoServer):
         return data
 
     def split_shapefile(self, data, rank=0):
+        """
+
+        :param data:
+        :param rank:
+        :return: Splitted Shapefile
+        :rtype: GeoDataFrame
+        """
 
         if self.comm.Get_size() == 1:
             data = data
diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 454ee37..e08fb15 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -71,7 +71,7 @@ class SolventsSector(Sector):
 
         self.logger.write_time_log('SolventsSector', 'read_proxies', timeit.default_timer() - spent_time)
         return proxies_df
-    
+
     def check_profiles(self):
         spent_time = timeit.default_timer()
         # Checking monthly profiles IDs
@@ -116,18 +116,34 @@ class SolventsSector(Sector):
         self.logger.write_time_log('SolventsSector', 'read_yearly_emissions', timeit.default_timer() - spent_time)
         return year_emis
 
+    def get_pop_by_nut2(self, path):
+        pop_by_nut2 = pd.read_csv(path)
+        pop_by_nut2 = pop_by_nut2.to_dict()['pop']
+
+        return pop_by_nut2
+
+
     def get_population_proxie(self, pop_raster_path, pop_by_nut2_path, nut2_shapefile_path):
 
+        # 1st Clip the raster
         if self.comm.Get_rank() == 0:
             pop_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly(
                 pop_raster_path, self.clip.shapefile, os.path.join(self.auxiliary_dir, 'solvents', 'pop.tif'))
+
+        # 2nd Raster to shapefile
         pop_shp = IoRaster(self.comm).to_shapefile_parallel(pop_raster_path, gather=True, bcast=False,
                                                             crs={'init': 'epsg:4326'})
+        # 3rd Add NUT code
         if self.comm.Get_rank() == 0:
             pop_shp.rename(columns={'data': 'population'}, inplace=True)
             pop_shp = self.add_nut_code(pop_shp, nut2_shapefile_path, nut_value='nuts2_id')
+            pop_shp = pop_shp[pop_shp['nut_code'] != -999]
         pop_shp = IoShapefile(self.comm).split_shapefile(pop_shp)
 
+        # 4th Calculate pop percent
+        pop_by_nut2 = self.get_pop_by_nut2(pop_by_nut2_path)
+        pop_shp['pop_percent'] = pop_shp.groupby('nut_code').apply(
+            lambda x: x['population'] / pop_by_nut2[x.name])
         print(pop_shp)
         # if self.comm.Get_rank() == 0:
         #     pop_shp.to_file('~/temp/pop{0}.shp'.format(self.comm.Get_size()))
-- 
GitLab


From 2f9aadc145e2915db86c81b4104df3d585ca4f0f Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 3 Sep 2019 17:14:48 +0200
Subject: [PATCH 09/22] population proxy done

---
 conf/hermes.conf                       | 14 ++++----
 hermesv3_bu/sectors/sector.py          |  2 +-
 hermesv3_bu/sectors/solvents_sector.py | 46 ++++++++++++++++++--------
 3 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/conf/hermes.conf b/conf/hermes.conf
index aec97cf..550f74f 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -10,7 +10,7 @@ start_date = 2016/11/29 00:00:00
 # end_date = 2010/01/01 00:00:00
 output_timestep_num = 24
 auxiliary_files_path = /scratch/Earth/HERMESv3_BU_aux/test/<domain_type>_<resolution>
-erase_auxiliary_files = 0
+erase_auxiliary_files = 1
 
 
 [DOMAIN]
@@ -55,10 +55,10 @@ vertical_description = <input_dir>/profiles/vertical/MONARCH_Global_48layers_ver
     #y_0 = 43862.90625
 
     # CATALUNYA test
-    nx = 28
-    ny = 30
-    #nx = 4
-    #ny = 4
+    #nx = 28
+    #ny = 30
+    nx = 4
+    ny = 4
     inc_x = 10000
     inc_y = 10000
     x_0 = 253151.59375
@@ -131,7 +131,7 @@ writing_processors = 1
 # traffic_processors = 256
 # traffic_area_processors = 1
 
-aviation_processors = 1
+aviation_processors = 0
 shipping_port_processors = 0
 livestock_processors = 0
 crop_operations_processors = 0
@@ -142,7 +142,7 @@ recreational_boats_processors = 0
 point_sources_processors = 0
 traffic_processors = 0
 traffic_area_processors = 0
-solvents_processors = 4
+solvents_processors = 1
 
 
 [SHAPEFILES]
diff --git a/hermesv3_bu/sectors/sector.py b/hermesv3_bu/sectors/sector.py
index f4cfc52..1a94e23 100755
--- a/hermesv3_bu/sectors/sector.py
+++ b/hermesv3_bu/sectors/sector.py
@@ -447,7 +447,7 @@ class Sector(object):
         nut_shapefile = gpd.read_file(nut_shapefile_path).to_crs(shapefile.crs)
         shapefile = gpd.sjoin(shapefile, nut_shapefile.loc[:, [nut_value, 'geometry']], how='left', op='intersects')
         del nut_shapefile
-        # shapefile = shapefile[~shapefile.index.duplicated(keep='first')]
+        shapefile = shapefile[~shapefile.index.duplicated(keep='first')]
         shapefile.drop('index_right', axis=1, inplace=True)
 
         shapefile.rename(columns={nut_value: 'nut_code'}, inplace=True)
diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index e08fb15..ef8bb3d 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -22,7 +22,6 @@ PROXY_NAMES = {'boat_building': 'boat',
                'pharmaceutical_products_manufacturing': 'pharma',
                'leather_taning': 'leather',
                'printing': 'printing',
-               'automobile_manufacturing': 'automobile'
                }
 
 
@@ -118,12 +117,13 @@ class SolventsSector(Sector):
 
     def get_pop_by_nut2(self, path):
         pop_by_nut2 = pd.read_csv(path)
+        pop_by_nut2.set_index('nuts2_id', inplace=True)
         pop_by_nut2 = pop_by_nut2.to_dict()['pop']
 
         return pop_by_nut2
 
-
     def get_population_proxie(self, pop_raster_path, pop_by_nut2_path, nut2_shapefile_path):
+        spent_time = timeit.default_timer()
 
         # 1st Clip the raster
         if self.comm.Get_rank() == 0:
@@ -133,22 +133,33 @@ class SolventsSector(Sector):
         # 2nd Raster to shapefile
         pop_shp = IoRaster(self.comm).to_shapefile_parallel(pop_raster_path, gather=True, bcast=False,
                                                             crs={'init': 'epsg:4326'})
+
         # 3rd Add NUT code
         if self.comm.Get_rank() == 0:
+            pop_shp.drop(columns='CELL_ID', inplace=True)
             pop_shp.rename(columns={'data': 'population'}, inplace=True)
             pop_shp = self.add_nut_code(pop_shp, nut2_shapefile_path, nut_value='nuts2_id')
             pop_shp = pop_shp[pop_shp['nut_code'] != -999]
         pop_shp = IoShapefile(self.comm).split_shapefile(pop_shp)
-
         # 4th Calculate pop percent
         pop_by_nut2 = self.get_pop_by_nut2(pop_by_nut2_path)
-        pop_shp['pop_percent'] = pop_shp.groupby('nut_code').apply(
-            lambda x: x['population'] / pop_by_nut2[x.name])
-        print(pop_shp)
-        # if self.comm.Get_rank() == 0:
-        #     pop_shp.to_file('~/temp/pop{0}.shp'.format(self.comm.Get_size()))
-        #     print(pop_shp)
-        exit()
+        pop_shp['tot_pop'] = pop_shp['nut_code'].map(pop_by_nut2)
+        pop_shp['pop_percent'] = pop_shp['population'] / pop_shp['tot_pop']
+        pop_shp.drop(columns=['tot_pop', 'population'], inplace=True)
+
+        # 5th Calculate percent by dest_cell
+        pop_shp.to_crs(self.grid.shapefile.crs, inplace=True)
+        pop_shp['src_inter_fraction'] = pop_shp.geometry.area
+        pop_shp = self.spatial_overlays(pop_shp.reset_index(), self.grid.shapefile.reset_index())
+        pop_shp.drop(columns=['idx1', 'idx2', 'index'], inplace=True)
+        pop_shp['src_inter_fraction'] = pop_shp.geometry.area / pop_shp['src_inter_fraction']
+        pop_shp['pop_percent'] = pop_shp['pop_percent'] * pop_shp['src_inter_fraction']
+        pop_shp.drop(columns=['src_inter_fraction'], inplace=True)
+
+        popu_dist = pop_shp.groupby(['FID', 'nut_code']).sum()
+        popu_dist.rename({'pop_percent': 'population'}, inplace=True)
+        self.logger.write_time_log('SolventsSector', 'get_population_proxie', timeit.default_timer() - spent_time)
+        return popu_dist
 
     def get_proxy_shapefile(self, population_raster_path, population_nuts2_path, nut2_shapefile_path):
 
@@ -157,11 +168,18 @@ class SolventsSector(Sector):
         proxy_shp_name = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.shp')
         if not os.path.exists(proxy_shp_name):
             for proxy in proxies_list:
-                proxy_shp = self.grid.shapefile.copy()
+                proxies_list = []
                 if proxy == 'population':
-                    proxy_shp['population'] = self.get_population_proxie(population_raster_path, population_nuts2_path,
-                                                                         nut2_shapefile_path)
+                    pop_proxy = self.get_population_proxie(population_raster_path, population_nuts2_path,
+                                                           nut2_shapefile_path)
+                    pop_proxy = self.comm.gather(pop_proxy.reset_index(), root=0)
+                    if self.comm.Get_rank() == 0:
+                        pop_proxy = pd.concat(pop_proxy)
+                        pop_proxy = pop_proxy.groupby(['FID', 'nut_code']).sum()
+                        pop_proxy.to_csv('/gpfs/projects/bsc32/bsc32538/temp/population_proxy.csv')
+                    self.comm.Barrier()
         else:
             pass
-        print(proxy_shp)
+        exit()
+        # print(proxy_shp)
         print(proxies_list)
-- 
GitLab


From 8ea9af36e01bbdc1d225d82b874e1e2be9f1d53b Mon Sep 17 00:00:00 2001
From: carles <carles.tena@bsc.es>
Date: Wed, 4 Sep 2019 13:00:11 +0200
Subject: [PATCH 10/22] Working on land use proxies

---
 hermesv3_bu/sectors/solvents_sector.py | 106 +++++++++++++++++++------
 1 file changed, 83 insertions(+), 23 deletions(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index ef8bb3d..130698a 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -48,7 +48,8 @@ class SolventsSector(Sector):
         self.proxies_map = self.read_proxies(proxies_map_path)
         self.check_profiles()
 
-        self.proxy = self.get_proxy_shapefile(population_raster_path, population_nuts2_path, nut2_shapefile_path)
+        self.proxy = self.get_proxy_shapefile(population_raster_path, population_nuts2_path, land_uses_raster_path,
+                                              land_uses_nuts2_path, nut2_shapefile_path)
 
         self.yearly_emissions = self.read_yearly_emissions(yearly_emissions_by_nut2_path)
         exit()
@@ -115,39 +116,59 @@ class SolventsSector(Sector):
         self.logger.write_time_log('SolventsSector', 'read_yearly_emissions', timeit.default_timer() - spent_time)
         return year_emis
 
-    def get_pop_by_nut2(self, path):
+    def get_population_by_nut2(self, path):
+        spent_time = timeit.default_timer()
+
         pop_by_nut2 = pd.read_csv(path)
         pop_by_nut2.set_index('nuts2_id', inplace=True)
         pop_by_nut2 = pop_by_nut2.to_dict()['pop']
 
+        self.logger.write_time_log('SolventsSector', 'get_pop_by_nut2', timeit.default_timer() - spent_time)
         return pop_by_nut2
 
-    def get_population_proxie(self, pop_raster_path, pop_by_nut2_path, nut2_shapefile_path):
+    def get_land_use_by_nut2(self, path, land_uses, nut_codes):
+        spent_time = timeit.default_timer()
+
+        land_use_by_nut2 = pd.read_csv(path)
+        land_use_by_nut2 = land_use_by_nut2[land_use_by_nut2['nuts2_id'].isin(nut_codes)]
+        land_use_by_nut2 = land_use_by_nut2[land_use_by_nut2['land_use'].isin(land_uses)]
+        land_use_by_nut2.set_index(['nuts2_id', 'land_use'], inplace=True)
+
+        self.logger.write_time_log('SolventsSector', 'get_land_use_by_nut2', timeit.default_timer() - spent_time)
+        return land_use_by_nut2
+
+    def get_population_proxy(self, pop_raster_path, pop_by_nut2_path, nut2_shapefile_path):
         spent_time = timeit.default_timer()
 
         # 1st Clip the raster
+        self.logger.write_log("\t\tCreating clipped population raster", message_level=3)
         if self.comm.Get_rank() == 0:
             pop_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly(
                 pop_raster_path, self.clip.shapefile, os.path.join(self.auxiliary_dir, 'solvents', 'pop.tif'))
 
         # 2nd Raster to shapefile
-        pop_shp = IoRaster(self.comm).to_shapefile_parallel(pop_raster_path, gather=True, bcast=False,
-                                                            crs={'init': 'epsg:4326'})
+        self.logger.write_log("\t\tRaster to shapefile", message_level=3)
+        pop_shp = IoRaster(self.comm).to_shapefile_parallel(
+            pop_raster_path, gather=True, bcast=False, crs={'init': 'epsg:4326'})
 
         # 3rd Add NUT code
+        self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3)
         if self.comm.Get_rank() == 0:
             pop_shp.drop(columns='CELL_ID', inplace=True)
             pop_shp.rename(columns={'data': 'population'}, inplace=True)
             pop_shp = self.add_nut_code(pop_shp, nut2_shapefile_path, nut_value='nuts2_id')
             pop_shp = pop_shp[pop_shp['nut_code'] != -999]
         pop_shp = IoShapefile(self.comm).split_shapefile(pop_shp)
-        # 4th Calculate pop percent
-        pop_by_nut2 = self.get_pop_by_nut2(pop_by_nut2_path)
+
+        # 4th Calculate population percent
+        self.logger.write_log("\t\tCalculating population percentage on source resolution", message_level=3)
+        pop_by_nut2 = self.get_population_by_nut2(pop_by_nut2_path)
         pop_shp['tot_pop'] = pop_shp['nut_code'].map(pop_by_nut2)
         pop_shp['pop_percent'] = pop_shp['population'] / pop_shp['tot_pop']
         pop_shp.drop(columns=['tot_pop', 'population'], inplace=True)
 
         # 5th Calculate percent by dest_cell
+        self.logger.write_log("\t\tCalculating population percentage on destiny resolution", message_level=3)
         pop_shp.to_crs(self.grid.shapefile.crs, inplace=True)
         pop_shp['src_inter_fraction'] = pop_shp.geometry.area
         pop_shp = self.spatial_overlays(pop_shp.reset_index(), self.grid.shapefile.reset_index())
@@ -158,28 +179,67 @@ class SolventsSector(Sector):
 
         popu_dist = pop_shp.groupby(['FID', 'nut_code']).sum()
         popu_dist.rename({'pop_percent': 'population'}, inplace=True)
+
         self.logger.write_time_log('SolventsSector', 'get_population_proxie', timeit.default_timer() - spent_time)
         return popu_dist
 
-    def get_proxy_shapefile(self, population_raster_path, population_nuts2_path, nut2_shapefile_path):
+    def get_land_use_proxy(self, land_use_raster, land_use_by_nut2_path, land_uses, nut2_shapefile_path):
+        spent_time = timeit.default_timer()
+        # 1st Clip the raster
+        self.logger.write_log("\t\tCreating clipped land use raster", message_level=3)
+        lu_raster_path = os.path.join(self.auxiliary_dir, 'solvents', 'lu_{0}.tif'.format('_'.join(land_uses)))
+        if self.comm.Get_rank() == 0:
+            if not os.path.exists(lu_raster_path):
+                lu_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly(
+                    land_use_raster, self.clip.shapefile, lu_raster_path)
+
+        # 2nd Raster to shapefile
+        self.logger.write_log("\t\tRaster to shapefile", message_level=3)
+        land_use_shp = IoRaster(self.comm).to_shapefile_parallel(
+            lu_raster_path, gather=False, bcast=False, crs={'init': 'epsg:4326'})
+        print(land_use_shp)
+        sys.stdout.flush()
+        # 3rd Add NUT code
+        self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3)
+        # if self.comm.Get_rank() == 0:
+        land_use_shp.drop(columns='CELL_ID', inplace=True)
+        land_use_shp.rename(columns={'data': 'population'}, inplace=True)
+        land_use_shp = self.add_nut_code(land_use_shp, nut2_shapefile_path, nut_value='nuts2_id')
+        land_use_shp = land_use_shp[land_use_shp['nut_code'] != -999]
+        # land_use_shp = IoShapefile(self.comm).split_shapefile(land_use_shp)
+
+        # 4th Calculate land_use percent
+        self.logger.write_log("\t\tCalculating land use percentage on source resolution", message_level=3)
+        land_use_by_nut2 = self.get_land_use_by_nut2(
+            land_use_by_nut2_path, land_uses, np.unique(land_use_shp['nut_code']))
+
+        # 5th Calculate percent by dest_cell
+        self.logger.write_log("\t\tCalculating land use percentage on destiny resolution", message_level=3)
 
-        proxies_list = np.unique(self.proxies_map['proxy_name'])
-        proxies_list = np.unique(['population'])
+        self.logger.write_time_log('SolventsSector', 'get_land_use_proxy', timeit.default_timer() - spent_time)
+        return True
+
+    def get_proxy_shapefile(self, population_raster_path, population_nuts2_path, land_uses_raster_path,
+                            land_uses_nuts2_path, nut2_shapefile_path):
+        spent_time = timeit.default_timer()
+        self.logger.write_log("Getting proxies shapefile")
+        # proxy_names_list = np.unique(self.proxies_map['proxy_name'])
+        proxy_names_list = np.unique(['lu_3'])
         proxy_shp_name = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.shp')
         if not os.path.exists(proxy_shp_name):
-            for proxy in proxies_list:
+            for proxy_name in proxy_names_list:
+                self.logger.write_log("\tGetting proxy for {0}".format(proxy_name), message_level=2)
                 proxies_list = []
-                if proxy == 'population':
-                    pop_proxy = self.get_population_proxie(population_raster_path, population_nuts2_path,
-                                                           nut2_shapefile_path)
-                    pop_proxy = self.comm.gather(pop_proxy.reset_index(), root=0)
-                    if self.comm.Get_rank() == 0:
-                        pop_proxy = pd.concat(pop_proxy)
-                        pop_proxy = pop_proxy.groupby(['FID', 'nut_code']).sum()
-                        pop_proxy.to_csv('/gpfs/projects/bsc32/bsc32538/temp/population_proxy.csv')
-                    self.comm.Barrier()
+                if proxy_name == 'population':
+                    pop_proxy = self.get_population_proxy(population_raster_path, population_nuts2_path,
+                                                          nut2_shapefile_path)
+                    proxies_list.append(pop_proxy)
+                if proxy_name[:3] == 'lu_':
+                    land_uses = proxy_name.split('_')
+                    land_use_proxy = self.get_land_use_proxy(land_uses_raster_path, land_uses_nuts2_path, land_uses,
+                                                             nut2_shapefile_path)
         else:
             pass
-        exit()
-        # print(proxy_shp)
-        print(proxies_list)
+
+        self.logger.write_time_log('SolventsSector', 'get_proxy_shapefile', timeit.default_timer() - spent_time)
+        return True
-- 
GitLab


From 9e5177ea98a73acae24e48bdff78a7c3e07bc4c6 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Wed, 4 Sep 2019 14:27:35 +0200
Subject: [PATCH 11/22] Solvents: population add nut code in parallel

---
 hermesv3_bu/sectors/solvents_sector.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 130698a..8a0bf51 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -149,16 +149,16 @@ class SolventsSector(Sector):
         # 2nd Raster to shapefile
         self.logger.write_log("\t\tRaster to shapefile", message_level=3)
         pop_shp = IoRaster(self.comm).to_shapefile_parallel(
-            pop_raster_path, gather=True, bcast=False, crs={'init': 'epsg:4326'})
+            pop_raster_path, gather=False, bcast=False, crs={'init': 'epsg:4326'})
 
         # 3rd Add NUT code
         self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3)
-        if self.comm.Get_rank() == 0:
-            pop_shp.drop(columns='CELL_ID', inplace=True)
-            pop_shp.rename(columns={'data': 'population'}, inplace=True)
-            pop_shp = self.add_nut_code(pop_shp, nut2_shapefile_path, nut_value='nuts2_id')
-            pop_shp = pop_shp[pop_shp['nut_code'] != -999]
-        pop_shp = IoShapefile(self.comm).split_shapefile(pop_shp)
+        # if self.comm.Get_rank() == 0:
+        pop_shp.drop(columns='CELL_ID', inplace=True)
+        pop_shp.rename(columns={'data': 'population'}, inplace=True)
+        pop_shp = self.add_nut_code(pop_shp, nut2_shapefile_path, nut_value='nuts2_id')
+        pop_shp = pop_shp[pop_shp['nut_code'] != -999]
+        # pop_shp = IoShapefile(self.comm).split_shapefile(pop_shp)
 
         # 4th Calculate population percent
         self.logger.write_log("\t\tCalculating population percentage on source resolution", message_level=3)
@@ -197,8 +197,7 @@ class SolventsSector(Sector):
         self.logger.write_log("\t\tRaster to shapefile", message_level=3)
         land_use_shp = IoRaster(self.comm).to_shapefile_parallel(
             lu_raster_path, gather=False, bcast=False, crs={'init': 'epsg:4326'})
-        print(land_use_shp)
-        sys.stdout.flush()
+
         # 3rd Add NUT code
         self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3)
         # if self.comm.Get_rank() == 0:
@@ -212,7 +211,8 @@ class SolventsSector(Sector):
         self.logger.write_log("\t\tCalculating land use percentage on source resolution", message_level=3)
         land_use_by_nut2 = self.get_land_use_by_nut2(
             land_use_by_nut2_path, land_uses, np.unique(land_use_shp['nut_code']))
-
+        print(land_use_by_nut2)
+        exit()
         # 5th Calculate percent by dest_cell
         self.logger.write_log("\t\tCalculating land use percentage on destiny resolution", message_level=3)
 
-- 
GitLab


From 69dba06875213cb7cc9492c79e7cedd6b0321c3e Mon Sep 17 00:00:00 2001
From: carles <carles.tena@bsc.es>
Date: Wed, 4 Sep 2019 16:23:20 +0200
Subject: [PATCH 12/22] WIP

---
 hermesv3_bu/sectors/solvents_sector.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 8a0bf51..f3d6e17 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -195,8 +195,7 @@ class SolventsSector(Sector):
 
         # 2nd Raster to shapefile
         self.logger.write_log("\t\tRaster to shapefile", message_level=3)
-        land_use_shp = IoRaster(self.comm).to_shapefile_parallel(
-            lu_raster_path, gather=False, bcast=False, crs={'init': 'epsg:4326'})
+        land_use_shp = IoRaster(self.comm).to_shapefile_parallel(lu_raster_path, gather=False, bcast=False)
 
         # 3rd Add NUT code
         self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3)
-- 
GitLab


From 334186635dfd168968c06ade4d81063e083778c1 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Thu, 5 Sep 2019 17:40:00 +0200
Subject: [PATCH 13/22] Solvents: WIP

---
 hermesv3_bu/sectors/solvents_sector.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index f3d6e17..ff5cb1f 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -187,11 +187,13 @@ class SolventsSector(Sector):
         spent_time = timeit.default_timer()
         # 1st Clip the raster
         self.logger.write_log("\t\tCreating clipped land use raster", message_level=3)
-        lu_raster_path = os.path.join(self.auxiliary_dir, 'solvents', 'lu_{0}.tif'.format('_'.join(land_uses)))
+        lu_raster_path = os.path.join(self.auxiliary_dir, 'solvents', 'lu_{0}.tif'.format(
+            '_'.join([str(x) for x in land_uses])))
+
         if self.comm.Get_rank() == 0:
             if not os.path.exists(lu_raster_path):
                 lu_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly(
-                    land_use_raster, self.clip.shapefile, lu_raster_path)
+                    land_use_raster, self.clip.shapefile, lu_raster_path, values=land_uses)
 
         # 2nd Raster to shapefile
         self.logger.write_log("\t\tRaster to shapefile", message_level=3)
@@ -201,16 +203,18 @@ class SolventsSector(Sector):
         self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3)
         # if self.comm.Get_rank() == 0:
         land_use_shp.drop(columns='CELL_ID', inplace=True)
-        land_use_shp.rename(columns={'data': 'population'}, inplace=True)
+        land_use_shp.rename(columns={'data': 'land_use'}, inplace=True)
         land_use_shp = self.add_nut_code(land_use_shp, nut2_shapefile_path, nut_value='nuts2_id')
         land_use_shp = land_use_shp[land_use_shp['nut_code'] != -999]
+        land_use_shp = IoShapefile(self.comm).balance(land_use_shp)
         # land_use_shp = IoShapefile(self.comm).split_shapefile(land_use_shp)
 
         # 4th Calculate land_use percent
         self.logger.write_log("\t\tCalculating land use percentage on source resolution", message_level=3)
         land_use_by_nut2 = self.get_land_use_by_nut2(
             land_use_by_nut2_path, land_uses, np.unique(land_use_shp['nut_code']))
-        print(land_use_by_nut2)
+        print(land_use_shp)
+        sys.stdout.flush()
         exit()
         # 5th Calculate percent by dest_cell
         self.logger.write_log("\t\tCalculating land use percentage on destiny resolution", message_level=3)
@@ -234,7 +238,8 @@ class SolventsSector(Sector):
                                                           nut2_shapefile_path)
                     proxies_list.append(pop_proxy)
                 if proxy_name[:3] == 'lu_':
-                    land_uses = proxy_name.split('_')
+                    land_uses = [int(x) for x in proxy_name[3:].split('_')]
+
                     land_use_proxy = self.get_land_use_proxy(land_uses_raster_path, land_uses_nuts2_path, land_uses,
                                                              nut2_shapefile_path)
         else:
-- 
GitLab


From 57000da640aa1fdaf8c9918a88b8c916d77b8062 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 6 Sep 2019 12:25:00 +0200
Subject: [PATCH 14/22] Solvents: Land use proxy done!!

---
 conf/hermes.conf                       |  4 +--
 hermesv3_bu/sectors/sector.py          | 43 ++++++++++++++++++++++++++
 hermesv3_bu/sectors/solvents_sector.py | 37 +++++++++++++++++-----
 3 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/conf/hermes.conf b/conf/hermes.conf
index 550f74f..bd58a79 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -3,7 +3,7 @@ log_level = 3
 input_dir = /home/Earth/ctena/Models/hermesv3_bu_data
 data_path = /esarchive/recon
 output_dir = /scratch/Earth/HERMESv3_BU_OUT
-output_name = HERMESv3_<date>_aviation.nc
+output_name = HERMESv3_<date>_solvents.nc
 emission_summary = 0
 start_date = 2016/11/29 00:00:00
 # ----- end_date = start_date [DEFAULT] -----
@@ -142,7 +142,7 @@ recreational_boats_processors = 0
 point_sources_processors = 0
 traffic_processors = 0
 traffic_area_processors = 0
-solvents_processors = 1
+solvents_processors = 4
 
 
 [SHAPEFILES]
diff --git a/hermesv3_bu/sectors/sector.py b/hermesv3_bu/sectors/sector.py
index 1a94e23..b15a61a 100755
--- a/hermesv3_bu/sectors/sector.py
+++ b/hermesv3_bu/sectors/sector.py
@@ -564,3 +564,46 @@ class Sector(object):
         return_value = [outs for outs, ints in self.speciation_map.items() if ints == input_pollutant]
         self.logger.write_time_log('Sector', 'get_output_pollutants', timeit.default_timer() - spent_time)
         return return_value
+
+    def calculate_land_use_by_nut(self, land_use_raster_path, nut_shapefile_path, out_land_use_by_nut_path):
+        from hermesv3_bu.io_server.io_raster import IoRaster
+        from hermesv3_bu.io_server.io_shapefile import IoShapefile
+
+        # 1st Clip the raster
+        lu_raster_path = os.path.join(self.auxiliary_dir, 'clipped_land_use.tif')
+
+        if self.comm.Get_rank() == 0:
+            if not os.path.exists(lu_raster_path):
+                lu_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly(
+                    land_use_raster_path, self.clip.shapefile, lu_raster_path)
+
+        # 2nd Raster to shapefile
+        land_use_shp = IoRaster(self.comm).to_shapefile_parallel(lu_raster_path, gather=False, bcast=False)
+
+        # 3rd Add NUT code
+        land_use_shp.drop(columns='CELL_ID', inplace=True)
+        land_use_shp.rename(columns={'data': 'land_use'}, inplace=True)
+        land_use_shp = self.add_nut_code(land_use_shp, nut_shapefile_path, nut_value='nuts2_id')
+        land_use_shp = land_use_shp[land_use_shp['nut_code'] != -999]
+        land_use_shp = IoShapefile(self.comm).balance(land_use_shp)
+
+        # 4th Calculate land_use percent
+        land_use_shp['area'] = land_use_shp.geometry.area
+
+        land_use_by_nut = GeoDataFrame(index=pd.MultiIndex.from_product(
+            [np.unique(land_use_shp['nut_code']), np.unique(land_use_shp['land_use'])], names=['nuts2_id', 'land_use']))
+
+        for nut_code in np.unique(land_use_shp['nut_code']):
+            for land_use in np.unique(land_use_shp['land_use']):
+                land_use_by_nut.loc[(nut_code, land_use), 'area'] = land_use_shp.loc[
+                    (land_use_shp['land_use'] == land_use) & (land_use_shp['nut_code'] == nut_code),  'area'].sum()
+
+        land_use_by_nut.reset_index(inplace=True)
+        land_use_by_nut = IoShapefile(self.comm).gather_shapefile(land_use_by_nut, rank=0)
+
+        if self.comm.Get_rank() == 0:
+            land_use_by_nut = land_use_by_nut.groupby(['nuts2_id', 'land_use']).sum()
+            land_use_by_nut.to_csv(out_land_use_by_nut_path)
+            print('DONE -> {0}'.format(out_land_use_by_nut_path))
+        self.comm.Barrier()
+
diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index ff5cb1f..fc32df9 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -34,10 +34,10 @@ class SolventsSector(Sector):
 
         spent_time = timeit.default_timer()
         logger.write_log('===== SOLVENTS SECTOR =====')
+
         check_files([speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
                      weekly_profile_path, hourly_profile_path, proxies_map_path, yearly_emissions_by_nut2_path,
-                     point_sources_shapefile_path, population_raster_path,
-                     # population_nuts2_path,
+                     point_sources_shapefile_path, population_raster_path,population_nuts2_path,
                      land_uses_raster_path, land_uses_nuts2_path, nut2_shapefile_path])
 
         super(SolventsSector, self).__init__(
@@ -45,6 +45,9 @@ class SolventsSector(Sector):
             monthly_profile_path, weekly_profile_path, hourly_profile_path, speciation_map_path,
             speciation_profiles_path, molecular_weights_path)
 
+        # self.calculate_land_use_by_nut(land_uses_raster_path, nut2_shapefile_path, land_uses_nuts2_path)
+        # exit()
+
         self.proxies_map = self.read_proxies(proxies_map_path)
         self.check_profiles()
 
@@ -158,6 +161,7 @@ class SolventsSector(Sector):
         pop_shp.rename(columns={'data': 'population'}, inplace=True)
         pop_shp = self.add_nut_code(pop_shp, nut2_shapefile_path, nut_value='nuts2_id')
         pop_shp = pop_shp[pop_shp['nut_code'] != -999]
+        pop_shp = IoShapefile(self.comm).balance(pop_shp)
         # pop_shp = IoShapefile(self.comm).split_shapefile(pop_shp)
 
         # 4th Calculate population percent
@@ -178,7 +182,7 @@ class SolventsSector(Sector):
         pop_shp.drop(columns=['src_inter_fraction'], inplace=True)
 
         popu_dist = pop_shp.groupby(['FID', 'nut_code']).sum()
-        popu_dist.rename({'pop_percent': 'population'}, inplace=True)
+        popu_dist.rename(columns={'pop_percent': 'population'}, inplace=True)
 
         self.logger.write_time_log('SolventsSector', 'get_population_proxie', timeit.default_timer() - spent_time)
         return popu_dist
@@ -211,23 +215,39 @@ class SolventsSector(Sector):
 
         # 4th Calculate land_use percent
         self.logger.write_log("\t\tCalculating land use percentage on source resolution", message_level=3)
+
+        land_use_shp['area'] = land_use_shp.geometry.area
         land_use_by_nut2 = self.get_land_use_by_nut2(
             land_use_by_nut2_path, land_uses, np.unique(land_use_shp['nut_code']))
-        print(land_use_shp)
-        sys.stdout.flush()
-        exit()
+        land_use_shp.drop(columns=['land_use'], inplace=True)
+
+        land_use_shp['fraction'] = land_use_shp.apply(
+            lambda row: row['area'] / land_use_by_nut2.xs(row['nut_code'], level='nuts2_id').sum(), axis=1)
+        land_use_shp.drop(columns='area', inplace=True)
+        
         # 5th Calculate percent by dest_cell
         self.logger.write_log("\t\tCalculating land use percentage on destiny resolution", message_level=3)
 
+        land_use_shp.to_crs(self.grid.shapefile.crs, inplace=True)
+        land_use_shp['src_inter_fraction'] = land_use_shp.geometry.area
+        land_use_shp = self.spatial_overlays(land_use_shp.reset_index(), self.grid.shapefile.reset_index())
+        land_use_shp.drop(columns=['idx1', 'idx2', 'index'], inplace=True)
+        land_use_shp['src_inter_fraction'] = land_use_shp.geometry.area / land_use_shp['src_inter_fraction']
+        land_use_shp['fraction'] = land_use_shp['fraction'] * land_use_shp['src_inter_fraction']
+        land_use_shp.drop(columns=['src_inter_fraction'], inplace=True)
+
+        land_use_dist = land_use_shp.groupby(['FID', 'nut_code']).sum()
+        land_use_dist.rename(columns={'fraction': 'lu_{0}'.format('_'.join([str(x) for x in land_uses]))}, inplace=True)
+
         self.logger.write_time_log('SolventsSector', 'get_land_use_proxy', timeit.default_timer() - spent_time)
-        return True
+        return land_use_dist
 
     def get_proxy_shapefile(self, population_raster_path, population_nuts2_path, land_uses_raster_path,
                             land_uses_nuts2_path, nut2_shapefile_path):
         spent_time = timeit.default_timer()
         self.logger.write_log("Getting proxies shapefile")
         # proxy_names_list = np.unique(self.proxies_map['proxy_name'])
-        proxy_names_list = np.unique(['lu_3'])
+        proxy_names_list = np.unique(['lu_3_8'])
         proxy_shp_name = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.shp')
         if not os.path.exists(proxy_shp_name):
             for proxy_name in proxy_names_list:
@@ -242,6 +262,7 @@ class SolventsSector(Sector):
 
                     land_use_proxy = self.get_land_use_proxy(land_uses_raster_path, land_uses_nuts2_path, land_uses,
                                                              nut2_shapefile_path)
+                    print(land_use_proxy)
         else:
             pass
 
-- 
GitLab


From 4a2d1bbf83bb15d72fb22c2a8c4225c23b93dcff Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 6 Sep 2019 17:23:54 +0200
Subject: [PATCH 15/22] Solvents: Proxies done!!!!

---
 conf/hermes.conf                       |   3 +-
 hermesv3_bu/config/config.py           |   1 +
 hermesv3_bu/sectors/sector_manager.py  |   6 +-
 hermesv3_bu/sectors/solvents_sector.py | 105 +++++++++++++++++++------
 4 files changed, 89 insertions(+), 26 deletions(-)

diff --git a/conf/hermes.conf b/conf/hermes.conf
index bd58a79..9a532d9 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -10,7 +10,7 @@ start_date = 2016/11/29 00:00:00
 # end_date = 2010/01/01 00:00:00
 output_timestep_num = 24
 auxiliary_files_path = /scratch/Earth/HERMESv3_BU_aux/test/<domain_type>_<resolution>
-erase_auxiliary_files = 1
+erase_auxiliary_files = 0
 
 
 [DOMAIN]
@@ -354,6 +354,7 @@ solvents_pollutants = mnvoc
 solvents_proxies_path = <input_dir>/solvents/proxies_profiles.csv
 solvents_yearly_emissions_by_nut2_path = <input_dir>/solvents/miteco_solvent_emissions_nuts2_2015.csv
 solvents_point_sources_shapefile = <input_dir>/solvents/use_solvents_point_sources.shp
+solvents_point_sources_weight_by_nut2_path = <input_dir>/solvents/point_sources_weights_nuts2.csv
 solvents_monthly_profile = <input_dir>/profiles/temporal/solvents/monthly_profiles.csv
 solvents_weekly_profile = <input_dir>/profiles/temporal/solvents/weekly_profiles.csv
 solvents_hourly_profile = <input_dir>/profiles/temporal/solvents/hourly_profiles.csv
diff --git a/hermesv3_bu/config/config.py b/hermesv3_bu/config/config.py
index 12fe53f..76a81f1 100755
--- a/hermesv3_bu/config/config.py
+++ b/hermesv3_bu/config/config.py
@@ -354,6 +354,7 @@ class Config(ArgParser):
         p.add_argument('--solvents_proxies_path', required=False, help='...')
         p.add_argument('--solvents_yearly_emissions_by_nut2_path', required=False, help='...')
         p.add_argument('--solvents_point_sources_shapefile', required=False, help='...')
+        p.add_argument('--solvents_point_sources_weight_by_nut2_path', required=False, help='...')
         p.add_argument('--solvents_monthly_profile', required=False, help='...')
         p.add_argument('--solvents_weekly_profile', required=False, help='...')
         p.add_argument('--solvents_hourly_profile', required=False, help='...')
diff --git a/hermesv3_bu/sectors/sector_manager.py b/hermesv3_bu/sectors/sector_manager.py
index 5813b71..a483196 100755
--- a/hermesv3_bu/sectors/sector_manager.py
+++ b/hermesv3_bu/sectors/sector_manager.py
@@ -213,9 +213,9 @@ class SectorManager(object):
                     arguments.solvents_speciation_profiles, arguments.solvents_monthly_profile,
                     arguments.solvents_weekly_profile, arguments.solvents_hourly_profile,
                     arguments.solvents_proxies_path, arguments.solvents_yearly_emissions_by_nut2_path,
-                    arguments.solvents_point_sources_shapefile, arguments.population_density_map,
-                    arguments.population_nuts2, arguments.land_uses_path, arguments.land_use_by_nuts2_path,
-                    arguments.nuts2_shapefile)
+                    arguments.solvents_point_sources_shapefile, arguments.solvents_point_sources_weight_by_nut2_path,
+                    arguments.population_density_map, arguments.population_nuts2, arguments.land_uses_path,
+                    arguments.land_use_by_nuts2_path, arguments.nuts2_shapefile)
 
             color += 1
 
diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index fc32df9..2a9a463 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -14,7 +14,7 @@ from hermesv3_bu.tools.checker import check_files, error_exit
 PROXY_NAMES = {'boat_building': 'boat',
                'automobile_manufacturing': 'automobile',
                'car_repairing': 'car_repair',
-               'dry_cleaning': 'gry_clean',
+               'dry_cleaning': 'dry_clean',
                'rubber_processing': 'rubber',
                'paints_manufacturing': 'paints',
                'inks_manufacturing': 'ink',
@@ -29,16 +29,16 @@ class SolventsSector(Sector):
     def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
                  speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
                  weekly_profile_path, hourly_profile_path, proxies_map_path, yearly_emissions_by_nut2_path,
-                 point_sources_shapefile_path, population_raster_path, population_nuts2_path, land_uses_raster_path,
-                 land_uses_nuts2_path, nut2_shapefile_path):
+                 point_sources_shapefile_path, point_sources_weight_by_nut2_path, population_raster_path,
+                 population_nuts2_path, land_uses_raster_path, land_uses_nuts2_path, nut2_shapefile_path):
 
         spent_time = timeit.default_timer()
         logger.write_log('===== SOLVENTS SECTOR =====')
 
         check_files([speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
                      weekly_profile_path, hourly_profile_path, proxies_map_path, yearly_emissions_by_nut2_path,
-                     point_sources_shapefile_path, population_raster_path,population_nuts2_path,
-                     land_uses_raster_path, land_uses_nuts2_path, nut2_shapefile_path])
+                     point_sources_shapefile_path, point_sources_weight_by_nut2_path, population_raster_path,
+                     population_nuts2_path, land_uses_raster_path, land_uses_nuts2_path, nut2_shapefile_path])
 
         super(SolventsSector, self).__init__(
             comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
@@ -51,11 +51,10 @@ class SolventsSector(Sector):
         self.proxies_map = self.read_proxies(proxies_map_path)
         self.check_profiles()
 
-        self.proxy = self.get_proxy_shapefile(population_raster_path, population_nuts2_path, land_uses_raster_path,
-                                              land_uses_nuts2_path, nut2_shapefile_path)
-
+        self.proxy = self.get_proxy_shapefile(
+            population_raster_path, population_nuts2_path, land_uses_raster_path, land_uses_nuts2_path,
+            nut2_shapefile_path, point_sources_shapefile_path, point_sources_weight_by_nut2_path)
         self.yearly_emissions = self.read_yearly_emissions(yearly_emissions_by_nut2_path)
-        exit()
         self.logger.write_time_log('SolventsSector', '__init__', timeit.default_timer() - spent_time)
 
     def read_proxies(self, path):
@@ -129,6 +128,20 @@ class SolventsSector(Sector):
         self.logger.write_time_log('SolventsSector', 'get_pop_by_nut2', timeit.default_timer() - spent_time)
         return pop_by_nut2
 
+    def get_point_sources_weights_by_nut2(self, path, proxy_name):
+        spent_time = timeit.default_timer()
+
+        weights_by_nut2 = pd.read_csv(path)
+        weights_by_nut2['nuts2_id'] = weights_by_nut2['nuts2_id'].astype(int)
+        weights_by_nut2 = weights_by_nut2[weights_by_nut2['industry_c'] == proxy_name]
+        weights_by_nut2.drop(columns=['industry_c'], inplace=True)
+        weights_by_nut2.set_index("nuts2_id", inplace=True)
+        weights_by_nut2 = weights_by_nut2.to_dict()['weight']
+
+        self.logger.write_time_log('SolventsSector', 'get_point_sources_weights_by_nut2',
+                                   timeit.default_timer() - spent_time)
+        return weights_by_nut2
+
     def get_land_use_by_nut2(self, path, land_uses, nut_codes):
         spent_time = timeit.default_timer()
 
@@ -242,29 +255,77 @@ class SolventsSector(Sector):
         self.logger.write_time_log('SolventsSector', 'get_land_use_proxy', timeit.default_timer() - spent_time)
         return land_use_dist
 
+    def get_point_shapefile_proxy(self, proxy_name, point_shapefile_path, point_sources_weight_by_nut2_path,
+                                  nut2_shapefile_path):
+        spent_time = timeit.default_timer()
+
+        point_shapefile = IoShapefile(self.comm).read_shapefile_parallel(point_shapefile_path)
+        point_shapefile.drop(columns=['Empresa', 'Empleados', 'Ingresos', 'Consumos', 'LON', 'LAT'], inplace=True)
+        point_shapefile = point_shapefile[point_shapefile['industry_c'] ==
+                                          [key for key, value in PROXY_NAMES.items() if value == proxy_name][0]]
+        point_shapefile = IoShapefile(self.comm).balance(point_shapefile)
+        point_shapefile.drop(columns=['industry_c'], inplace=True)
+        point_shapefile = self.add_nut_code(point_shapefile, nut2_shapefile_path, nut_value='nuts2_id')
+        point_shapefile = point_shapefile[point_shapefile['nut_code'] != -999]
+
+        point_shapefile = IoShapefile(self.comm).gather_shapefile(point_shapefile, rank=0)
+        if self.comm.Get_rank() == 0:
+            weight_by_nut2 = self.get_point_sources_weights_by_nut2(
+                point_sources_weight_by_nut2_path,
+                [key for key, value in PROXY_NAMES.items() if value == proxy_name][0])
+            point_shapefile[proxy_name] = point_shapefile.apply(
+                lambda row: row['weight'] / weight_by_nut2[row['nut_code']], axis=1)
+            point_shapefile.drop(columns=['weight'], inplace=True)
+            # print(point_shapefile.groupby('nut_code')['weight'].sum())
+
+        point_shapefile = IoShapefile(self.comm).split_shapefile(point_shapefile)
+        point_shapefile = gpd.sjoin(point_shapefile.to_crs(self.grid.shapefile.crs), self.grid.shapefile.reset_index())
+        point_shapefile.drop(columns=['geometry', 'index_right'], inplace=True)
+        point_shapefile = point_shapefile.groupby(['FID', 'nut_code']).sum()
+
+        self.logger.write_time_log('SolventsSector', 'get_point_shapefile_proxy', timeit.default_timer() - spent_time)
+        return point_shapefile
+
     def get_proxy_shapefile(self, population_raster_path, population_nuts2_path, land_uses_raster_path,
-                            land_uses_nuts2_path, nut2_shapefile_path):
+                            land_uses_nuts2_path, nut2_shapefile_path, point_sources_shapefile_path,
+                            point_sources_weight_by_nut2_path):
         spent_time = timeit.default_timer()
         self.logger.write_log("Getting proxies shapefile")
-        # proxy_names_list = np.unique(self.proxies_map['proxy_name'])
-        proxy_names_list = np.unique(['lu_3_8'])
-        proxy_shp_name = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.shp')
-        if not os.path.exists(proxy_shp_name):
+        proxy_names_list = np.unique(self.proxies_map['proxy_name'])
+        proxy_path = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.csv')
+        if not os.path.exists(proxy_path):
+            proxy_list = []
             for proxy_name in proxy_names_list:
                 self.logger.write_log("\tGetting proxy for {0}".format(proxy_name), message_level=2)
-                proxies_list = []
                 if proxy_name == 'population':
-                    pop_proxy = self.get_population_proxy(population_raster_path, population_nuts2_path,
+                    proxy = self.get_population_proxy(population_raster_path, population_nuts2_path,
                                                           nut2_shapefile_path)
-                    proxies_list.append(pop_proxy)
-                if proxy_name[:3] == 'lu_':
+                elif proxy_name[:3] == 'lu_':
                     land_uses = [int(x) for x in proxy_name[3:].split('_')]
 
-                    land_use_proxy = self.get_land_use_proxy(land_uses_raster_path, land_uses_nuts2_path, land_uses,
+                    proxy = self.get_land_use_proxy(land_uses_raster_path, land_uses_nuts2_path, land_uses,
                                                              nut2_shapefile_path)
-                    print(land_use_proxy)
+                else:
+                    proxy = self.get_point_shapefile_proxy(proxy_name, point_sources_shapefile_path,
+                                                           point_sources_weight_by_nut2_path, nut2_shapefile_path)
+                proxy = IoShapefile(self.comm).gather_shapefile(proxy.reset_index())
+                if self.comm.Get_rank() == 0:
+                    proxy_list.append(proxy)
+            if self.comm.Get_rank() == 0:
+                proxies = pd.concat(proxy_list, sort=False)
+                proxies['FID'] = proxies['FID'].astype(int)
+                proxies['nut_code'] = proxies['nut_code'].astype(int)
+                proxies = proxies.groupby(['FID', 'nut_code']).sum()
+                proxies.to_csv(proxy_path)
+            else:
+                proxies = None
+            proxies = self.comm.bcast(proxies, root=0)
         else:
-            pass
+            proxies = pd.read_csv(proxy_path, index_col=['FID', 'nut_code'])
 
         self.logger.write_time_log('SolventsSector', 'get_proxy_shapefile', timeit.default_timer() - spent_time)
-        return True
+        return proxies
+
+    def calculate_emissions(self):
+        print('HOLAAA')
+        exit()
-- 
GitLab


From e8128f1bf4701221558315d07d1b9b2094c4d6f0 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 6 Sep 2019 18:00:25 +0200
Subject: [PATCH 16/22] Solvents: Proxies as shapefile

---
 hermesv3_bu/sectors/solvents_sector.py | 54 +++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 10 deletions(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 2a9a463..39433f8 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -10,6 +10,8 @@ from hermesv3_bu.sectors.sector import Sector
 from hermesv3_bu.io_server.io_shapefile import IoShapefile
 from hermesv3_bu.io_server.io_raster import IoRaster
 from hermesv3_bu.tools.checker import check_files, error_exit
+from pandas import DataFrame
+from geopandas import GeoDataFrame
 
 PROXY_NAMES = {'boat_building': 'boat',
                'automobile_manufacturing': 'automobile',
@@ -54,7 +56,8 @@ class SolventsSector(Sector):
         self.proxy = self.get_proxy_shapefile(
             population_raster_path, population_nuts2_path, land_uses_raster_path, land_uses_nuts2_path,
             nut2_shapefile_path, point_sources_shapefile_path, point_sources_weight_by_nut2_path)
-        self.yearly_emissions = self.read_yearly_emissions(yearly_emissions_by_nut2_path)
+
+        self.yearly_emissions_path = yearly_emissions_by_nut2_path
         self.logger.write_time_log('SolventsSector', '__init__', timeit.default_timer() - spent_time)
 
     def read_proxies(self, path):
@@ -108,10 +111,11 @@ class SolventsSector(Sector):
         self.logger.write_time_log('SolventsSector', 'check_profiles', timeit.default_timer() - spent_time)
         return True
 
-    def read_yearly_emissions(self, path):
+    def read_yearly_emissions(self, path, nut_list):
         spent_time = timeit.default_timer()
 
-        year_emis = pd.read_csv(path, dtype={'nuts2_id': str, 'snap': str, 'nmvoc': np.float64})
+        year_emis = pd.read_csv(path, dtype={'nuts2_id': int, 'snap': str, 'nmvoc': np.float64})
+        year_emis = year_emis[year_emis['nuts2_id'].isin(nut_list)]
         year_emis.set_index(['nuts2_id', 'snap'], inplace=True)
         year_emis.drop(columns=['gnfr_description', 'gnfr', 'snap_description', 'nuts2_na'], inplace=True)
 
@@ -289,22 +293,34 @@ class SolventsSector(Sector):
     def get_proxy_shapefile(self, population_raster_path, population_nuts2_path, land_uses_raster_path,
                             land_uses_nuts2_path, nut2_shapefile_path, point_sources_shapefile_path,
                             point_sources_weight_by_nut2_path):
+        """
+
+        :param population_raster_path:
+        :param population_nuts2_path:
+        :param land_uses_raster_path:
+        :param land_uses_nuts2_path:
+        :param nut2_shapefile_path:
+        :param point_sources_shapefile_path:
+        :param point_sources_weight_by_nut2_path:
+        :return:
+        :rtype: DataFrame
+        """
         spent_time = timeit.default_timer()
         self.logger.write_log("Getting proxies shapefile")
         proxy_names_list = np.unique(self.proxies_map['proxy_name'])
-        proxy_path = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.csv')
+        proxy_path = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.shp')
         if not os.path.exists(proxy_path):
             proxy_list = []
             for proxy_name in proxy_names_list:
                 self.logger.write_log("\tGetting proxy for {0}".format(proxy_name), message_level=2)
                 if proxy_name == 'population':
                     proxy = self.get_population_proxy(population_raster_path, population_nuts2_path,
-                                                          nut2_shapefile_path)
+                                                      nut2_shapefile_path)
                 elif proxy_name[:3] == 'lu_':
                     land_uses = [int(x) for x in proxy_name[3:].split('_')]
 
                     proxy = self.get_land_use_proxy(land_uses_raster_path, land_uses_nuts2_path, land_uses,
-                                                             nut2_shapefile_path)
+                                                    nut2_shapefile_path)
                 else:
                     proxy = self.get_point_shapefile_proxy(proxy_name, point_sources_shapefile_path,
                                                            point_sources_weight_by_nut2_path, nut2_shapefile_path)
@@ -316,16 +332,34 @@ class SolventsSector(Sector):
                 proxies['FID'] = proxies['FID'].astype(int)
                 proxies['nut_code'] = proxies['nut_code'].astype(int)
                 proxies = proxies.groupby(['FID', 'nut_code']).sum()
-                proxies.to_csv(proxy_path)
+                proxies = GeoDataFrame(proxies)
+                # print(self.grid.shapefile.loc[proxies.index.get_level_values('FID'), 'geometry'].values)
+                # exit()
+                proxies = GeoDataFrame(
+                    proxies, geometry=self.grid.shapefile.loc[proxies.index.get_level_values('FID'), 'geometry'].values,
+                    crs=self.grid.shapefile.crs)
+                IoShapefile(self.comm).write_shapefile_serial(proxies.reset_index(), proxy_path)
             else:
                 proxies = None
-            proxies = self.comm.bcast(proxies, root=0)
         else:
-            proxies = pd.read_csv(proxy_path, index_col=['FID', 'nut_code'])
+            if self.comm.Get_rank() == 0:
+                proxies = IoShapefile(self.comm).read_shapefile_serial(proxy_path)
+                proxies.set_index(['FID', 'nut_code'], inplace=True)
+            else:
+                proxies = None
+        proxies = self.comm.bcast(proxies, root=0)
 
         self.logger.write_time_log('SolventsSector', 'get_proxy_shapefile', timeit.default_timer() - spent_time)
         return proxies
 
+    def calculate_hourly_emissions(self, yearly_emissions):
+        print(self.add_dates(yearly_emissions))
+        exit()
+
     def calculate_emissions(self):
-        print('HOLAAA')
+        # Distribute emissions first.
+        # yearly_emissions = IoShapefile(self.comm).split_shapefile(
+        #     self.read_yearly_emissions(self.yearly_emissions_path,
+        #                                np.unique(self.proxy.index.get_level_values('nut_code'))))
+        # hourly_emissions = self.calculate_hourly_emissions(yearly_emissions)
         exit()
-- 
GitLab


From 77620284a4e4ca347fc9ea841b2a49e68590f0be Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 6 Sep 2019 18:05:13 +0200
Subject: [PATCH 17/22] PEP8 Corrections

---
 hermesv3_bu/sectors/sector.py          | 1 -
 hermesv3_bu/sectors/solvents_sector.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/hermesv3_bu/sectors/sector.py b/hermesv3_bu/sectors/sector.py
index b15a61a..0427e7b 100755
--- a/hermesv3_bu/sectors/sector.py
+++ b/hermesv3_bu/sectors/sector.py
@@ -606,4 +606,3 @@ class Sector(object):
             land_use_by_nut.to_csv(out_land_use_by_nut_path)
             print('DONE -> {0}'.format(out_land_use_by_nut_path))
         self.comm.Barrier()
-
diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 39433f8..178e065 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -241,7 +241,7 @@ class SolventsSector(Sector):
         land_use_shp['fraction'] = land_use_shp.apply(
             lambda row: row['area'] / land_use_by_nut2.xs(row['nut_code'], level='nuts2_id').sum(), axis=1)
         land_use_shp.drop(columns='area', inplace=True)
-        
+
         # 5th Calculate percent by dest_cell
         self.logger.write_log("\t\tCalculating land use percentage on destiny resolution", message_level=3)
 
-- 
GitLab


From 2fed728d806951ab54237a50ab19120e016dbdc5 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Mon, 9 Sep 2019 18:26:34 +0200
Subject: [PATCH 18/22] Solvents: Testing (1 year sim)

---
 conf/hermes.conf                       |  12 +--
 hermesv3_bu/sectors/solvents_sector.py | 111 +++++++++++++++++++++++--
 2 files changed, 109 insertions(+), 14 deletions(-)

diff --git a/conf/hermes.conf b/conf/hermes.conf
index 9a532d9..bb3de1d 100755
--- a/conf/hermes.conf
+++ b/conf/hermes.conf
@@ -10,7 +10,7 @@ start_date = 2016/11/29 00:00:00
 # end_date = 2010/01/01 00:00:00
 output_timestep_num = 24
 auxiliary_files_path = /scratch/Earth/HERMESv3_BU_aux/test/<domain_type>_<resolution>
-erase_auxiliary_files = 0
+erase_auxiliary_files = 1
 
 
 [DOMAIN]
@@ -55,10 +55,10 @@ vertical_description = <input_dir>/profiles/vertical/MONARCH_Global_48layers_ver
     #y_0 = 43862.90625
 
     # CATALUNYA test
-    #nx = 28
-    #ny = 30
-    nx = 4
-    ny = 4
+    nx = 28
+    ny = 30
+    #nx = 4
+    #ny = 4
     inc_x = 10000
     inc_y = 10000
     x_0 = 253151.59375
@@ -350,7 +350,7 @@ small_cities_weekly_profile = <input_dir>/profiles/temporal/traffic_area/small_c
 small_cities_hourly_profile = <input_dir>/profiles/temporal/traffic_area/small_cities_hourly_profiles.csv
 
 [SOLVENTS]
-solvents_pollutants = mnvoc
+solvents_pollutants = nmvoc
 solvents_proxies_path = <input_dir>/solvents/proxies_profiles.csv
 solvents_yearly_emissions_by_nut2_path = <input_dir>/solvents/miteco_solvent_emissions_nuts2_2015.csv
 solvents_point_sources_shapefile = <input_dir>/solvents/use_solvents_point_sources.shp
diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 178e065..0a4cb4f 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -6,6 +6,7 @@ import timeit
 import geopandas as gpd
 import pandas as pd
 import numpy as np
+from warnings import warn
 from hermesv3_bu.sectors.sector import Sector
 from hermesv3_bu.io_server.io_shapefile import IoShapefile
 from hermesv3_bu.io_server.io_raster import IoRaster
@@ -347,19 +348,113 @@ class SolventsSector(Sector):
                 proxies.set_index(['FID', 'nut_code'], inplace=True)
             else:
                 proxies = None
-        proxies = self.comm.bcast(proxies, root=0)
+        proxies = IoShapefile(self.comm).split_shapefile(proxies)
 
         self.logger.write_time_log('SolventsSector', 'get_proxy_shapefile', timeit.default_timer() - spent_time)
         return proxies
 
     def calculate_hourly_emissions(self, yearly_emissions):
-        print(self.add_dates(yearly_emissions))
-        exit()
+        def get_mf(df):
+            month_factor = self.monthly_profiles.loc[df.name[1], df.name[0]]
+
+            df['MF'] = month_factor
+            return df.loc[:, ['MF']]
+
+        def get_wf(df):
+            weekly_profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc[df.name[1], :].to_dict(),
+                                                                      df.name[0])
+            df['WF'] = weekly_profile[df.name[0].weekday()]
+            return df.loc[:, ['WF']]
+
+        def get_hf(df):
+            hourly_profile = self.hourly_profiles.loc[df.name[1], :].to_dict()
+            hour_factor = hourly_profile[df.name[0]]
+
+            df['HF'] = hour_factor
+            return df.loc[:, ['HF']]
+
+        emissions = self.add_dates(yearly_emissions, drop_utc=True)
+        
+        emissions['month'] = emissions['date'].dt.month
+        emissions['weekday'] = emissions['date'].dt.weekday
+        emissions['hour'] = emissions['date'].dt.hour
+        emissions['date_as_date'] = emissions['date'].dt.date
+
+        emissions['MF'] = emissions.groupby(['month', 'P_month']).apply(get_mf)
+        emissions['WF'] = emissions.groupby(['date_as_date', 'P_week']).apply(get_wf)
+        emissions['HF'] = emissions.groupby(['hour', 'P_hour']).apply(get_hf)
+
+        emissions['temp_factor'] = emissions['MF'] * emissions['WF'] * emissions['HF']
+        emissions.drop(columns=['MF', 'P_month', 'month', 'WF', 'P_week', 'weekday', 'HF', 'P_hour', 'hour', 'date',
+                                'date_as_date'], inplace=True)
+        emissions['nmvoc'] = emissions['nmvoc'] * emissions['temp_factor']
+        emissions.drop(columns=['temp_factor'], inplace=True)
+        emissions.set_index(['FID', 'snap', 'tstep'], inplace=True)
+
+        # emissions = IoShapefile(self.comm).balance(emissions)
+        
+        return emissions
+
+    def distribute_yearly_emissions(self):
+        yearly_emis = self.read_yearly_emissions(
+            self.yearly_emissions_path, np.unique(self.proxy.index.get_level_values('nut_code')))
+        year_nuts = np.unique(yearly_emis.index.get_level_values('nuts2_id'))
+        proxy_nuts = np.unique(self.proxy.index.get_level_values('nut_code'))
+        unknow_nuts = list(set(proxy_nuts) - set(year_nuts))
+        if len(unknow_nuts) > 0:
+            warn("*WARNING* The {0} nuts2_id have no emissions in the solvents_yearly_emissions_by_nut2_path.".format(
+                str(unknow_nuts)))
+            self.proxy.drop(unknow_nuts, level='nut_code', inplace=True)
+        emis_list = []
+        for snap, snap_df in self.proxies_map.iterrows():
+            emis = self.proxy.reset_index()
+            emis['snap'] = snap
+            emis['P_month'] = snap_df['P_month']
+            emis['P_week'] = snap_df['P_week']
+            emis['P_hour'] = snap_df['P_hour']
+            emis['P_spec'] = snap_df['P_spec']
+            # print(yearly_emis.index)
+            # print(yearly_emis.loc[(9, '060408'), 'nmvoc'])
+            # exit()
+            emis['nmvoc'] = emis.apply(lambda row: yearly_emis.loc[(row['nut_code'], snap), 'nmvoc'] * row[
+                self.proxies_map.loc[snap, 'proxy_name']], axis=1)
+
+            emis.set_index(['FID', 'snap'], inplace=True)
+            emis_list.append(emis[['P_month', 'P_week', 'P_hour', 'P_spec', 'nmvoc', 'geometry']])
+        emis = pd.concat(emis_list).sort_index()
+        emis = emis[emis['nmvoc'] > 0]
+        # emis = IoShapefile(self.comm).balance(emis)
+        return emis
+
+    def speciate(self, dataframe, code='default'):
+
+        def calculate_new_pollutant(x, out_p):
+            sys.stdout.flush()
+            profile = self.speciation_profile.loc[x.name, ['VOCtoTOG', out_p]]
+            x[out_p] = x['nmvoc'] * (profile['VOCtoTOG'] * profile[out_p])
+            return x[[out_p]]
+
+        new_dataframe = gpd.GeoDataFrame(index=dataframe.index, data=None, crs=dataframe.crs,
+                                         geometry=dataframe.geometry)
+        for out_pollutant in self.output_pollutants:
+            new_dataframe[out_pollutant] = dataframe.groupby('P_spec').apply(
+                lambda x: calculate_new_pollutant(x, out_pollutant))
+        new_dataframe.reset_index(inplace=True)
+
+        new_dataframe.drop(columns=['snap', 'geometry'], inplace=True)
+        new_dataframe.set_index(['FID', 'tstep'], inplace=True)
+        return new_dataframe
 
     def calculate_emissions(self):
         # Distribute emissions first.
-        # yearly_emissions = IoShapefile(self.comm).split_shapefile(
-        #     self.read_yearly_emissions(self.yearly_emissions_path,
-        #                                np.unique(self.proxy.index.get_level_values('nut_code'))))
-        # hourly_emissions = self.calculate_hourly_emissions(yearly_emissions)
-        exit()
+        emissions = self.distribute_yearly_emissions()
+        emissions = self.calculate_hourly_emissions(emissions)
+        emissions.to_csv('/gpfs/projects/bsc32/bsc32538/DATA/HERMESv3_BU_OUT/logs/solvents1_rank{0}.csv'.format(self.comm.Get_rank()))
+        emissions = self.speciate(emissions)
+        emissions.to_csv('/gpfs/projects/bsc32/bsc32538/DATA/HERMESv3_BU_OUT/logs/solvents2_rank{0}.csv'.format(self.comm.Get_rank()))
+
+        emissions.reset_index(inplace=True)
+        emissions['layer'] = 0
+        emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum()
+        emissions.to_csv('/gpfs/projects/bsc32/bsc32538/DATA/HERMESv3_BU_OUT/logs/solvents3_rank{0}.csv'.format(self.comm.Get_rank()))
+        return emissions
-- 
GitLab


From 5997a721620d99f9278f051d932613ee158cc9ea Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 10 Sep 2019 10:28:42 +0200
Subject: [PATCH 19/22] PEP8 corrections

---
 hermesv3_bu/sectors/solvents_sector.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 0a4cb4f..2f7816b 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -374,7 +374,7 @@ class SolventsSector(Sector):
             return df.loc[:, ['HF']]
 
         emissions = self.add_dates(yearly_emissions, drop_utc=True)
-        
+
         emissions['month'] = emissions['date'].dt.month
         emissions['weekday'] = emissions['date'].dt.weekday
         emissions['hour'] = emissions['date'].dt.hour
@@ -391,8 +391,6 @@ class SolventsSector(Sector):
         emissions.drop(columns=['temp_factor'], inplace=True)
         emissions.set_index(['FID', 'snap', 'tstep'], inplace=True)
 
-        # emissions = IoShapefile(self.comm).balance(emissions)
-        
         return emissions
 
     def distribute_yearly_emissions(self):
@@ -449,12 +447,9 @@ class SolventsSector(Sector):
         # Distribute emissions first.
         emissions = self.distribute_yearly_emissions()
         emissions = self.calculate_hourly_emissions(emissions)
-        emissions.to_csv('/gpfs/projects/bsc32/bsc32538/DATA/HERMESv3_BU_OUT/logs/solvents1_rank{0}.csv'.format(self.comm.Get_rank()))
         emissions = self.speciate(emissions)
-        emissions.to_csv('/gpfs/projects/bsc32/bsc32538/DATA/HERMESv3_BU_OUT/logs/solvents2_rank{0}.csv'.format(self.comm.Get_rank()))
 
         emissions.reset_index(inplace=True)
         emissions['layer'] = 0
         emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum()
-        emissions.to_csv('/gpfs/projects/bsc32/bsc32538/DATA/HERMESv3_BU_OUT/logs/solvents3_rank{0}.csv'.format(self.comm.Get_rank()))
         return emissions
-- 
GitLab


From 77d550a1556a09483f90106c47d2fed4e146a28b Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 10 Sep 2019 18:16:36 +0200
Subject: [PATCH 20/22] Solvents: Documenting

---
 hermesv3_bu/sectors/solvents_sector.py | 326 +++++++++++++++++++++++--
 1 file changed, 307 insertions(+), 19 deletions(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index 2f7816b..cd2843f 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -13,6 +13,9 @@ from hermesv3_bu.io_server.io_raster import IoRaster
 from hermesv3_bu.tools.checker import check_files, error_exit
 from pandas import DataFrame
 from geopandas import GeoDataFrame
+from hermesv3_bu.grids.grid import Grid
+from hermesv3_bu.logger.log import Log
+from hermesv3_bu.clipping.clip import Clip
 
 PROXY_NAMES = {'boat_building': 'boat',
                'automobile_manufacturing': 'automobile',
@@ -29,12 +32,104 @@ PROXY_NAMES = {'boat_building': 'boat',
 
 
 class SolventsSector(Sector):
+    """
+    Solvents sector allows to calculate the solvents emissions.
+
+    It first calculates the horizontal distribution for the different sources and store them in an auxiliary file
+    during the initialization part.
+
+    Once the initialization is finished it distribute the emissions of the different sub sectors ont he grid to start
+    the temporal disaggregation.
+    """
     def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels,
                  speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path,
                  weekly_profile_path, hourly_profile_path, proxies_map_path, yearly_emissions_by_nut2_path,
                  point_sources_shapefile_path, point_sources_weight_by_nut2_path, population_raster_path,
                  population_nuts2_path, land_uses_raster_path, land_uses_nuts2_path, nut2_shapefile_path):
+        """
+        :param comm: Communicator for the sector calculation.
+        :type comm: MPI.COMM
+
+        :param logger: Logger
+        :type logger: Log
+
+        :param auxiliary_dir: Path to the directory where the necessary auxiliary files will be created if them are not
+            created yet.
+        :type auxiliary_dir: str
+
+        :param grid: Grid object.
+        :type grid: Grid
+
+        :param clip: Clip object
+        :type clip: Clip
+
+        :param date_array: List of datetimes.
+        :type date_array: list(datetime.datetime, ...)
+
+        :param source_pollutants: List of input pollutants to take into account.
+        :type source_pollutants: list
+
+        :param vertical_levels: List of top level of each vertical layer.
+        :type vertical_levels: list
+
+        :param speciation_map_path: Path to the CSV file that contains the speciation map. The CSV file must contain
+            the following columns [dst, src, description]
+            The 'dst' column will be used as output pollutant list and the 'src' column as their onw input pollutant
+            to be used as a fraction in the speciation profiles.
+        :type speciation_map_path: str
+
+        :param molecular_weights_path: Path to the CSV file that contains all the molecular weights needed. The CSV
+            file must contain the 'Specie' and 'MW' columns.
+        :type molecular_weights_path: str
+
+        :param speciation_profiles_path: Path to the file that contains all the speciation profiles. The CSV file
+            must contain the "Code" column with the value of each animal of the animal_list. The rest of columns
+            have to be the sames as the column 'dst' of the 'speciation_map_path' file.
+        :type speciation_profiles_path: str
+
+        :param hourly_profile_path: Path to the CSV file that contains all the monthly profiles. The CSV file must
+            contain the following columns [P_month, January, February, ..., November, December]
+            The P_month code have to match with the proxies_map_path file.
+        :type hourly_profile_path: str
+
+        :param weekly_profile_path: Path to the CSV file that contains all the weekly profiles. The CSV file must
+            contain the following columns [P_week, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday]
+            The P_week code have to match with the proxies_map_path file.
+        :type weekly_profile_path: str
 
+        :param hourly_profile_path: Path to the CSV file that contains all the hourly profiles. The CSV file must
+            contain the following columns [P_hour, 0, 1, 2, 3, ..., 22, 23]
+            The P_week code have to match with the proxies_map_path file.
+        :type hourly_profile_path: str
+
+        :param proxies_map_path: Path to the CSV file that contains the proxies map.
+        :type proxies_map_path: str
+
+        :param yearly_emissions_by_nut2_path: Path to the CSV file that contains the yearly emissions by subsecotr and
+            nuts2 level.
+        :type yearly_emissions_by_nut2_path: str
+
+        :param point_sources_shapefile_path: Path to the shapefile that contains the point sources for solvents.
+        :type point_sources_shapefile_path: str
+
+        :param point_sources_weight_by_nut2_path: Path to the CSV file that contains the weight for each proxy and nut2.
+        :type point_sources_weight_by_nut2_path: str
+
+        :param population_raster_path: Path to the population raster.
+        :type population_raster_path: str
+
+        :param population_nuts2_path: Path to the CSV file that contains the amount of population for each nut2.
+        :type population_nuts2_path: str
+
+        :param land_uses_raster_path: Path to the land use raster.
+        :type land_uses_raster_path: str
+
+        :param land_uses_nuts2_path: Path to the CSV file that contains the amount of land use for each nut2.
+        :type land_uses_nuts2_path: str
+
+        :param nut2_shapefile_path: Path to the shapefile that contains the nut2.
+        :type nut2_shapefile_path: str
+        """
         spent_time = timeit.default_timer()
         logger.write_log('===== SOLVENTS SECTOR =====')
 
@@ -48,9 +143,6 @@ class SolventsSector(Sector):
             monthly_profile_path, weekly_profile_path, hourly_profile_path, speciation_map_path,
             speciation_profiles_path, molecular_weights_path)
 
-        # self.calculate_land_use_by_nut(land_uses_raster_path, nut2_shapefile_path, land_uses_nuts2_path)
-        # exit()
-
         self.proxies_map = self.read_proxies(proxies_map_path)
         self.check_profiles()
 
@@ -62,6 +154,18 @@ class SolventsSector(Sector):
         self.logger.write_time_log('SolventsSector', '__init__', timeit.default_timer() - spent_time)
 
     def read_proxies(self, path):
+        """
+        Read the proxy map.
+
+        It will filter the CONS == '1' snaps and add the 'spatial_proxy' column that the content will match with some
+        column of the proxy shapefile.
+
+        :param path: path to the CSV file that have the proxy map.
+        :type path: str
+
+        :return: Proxy map.
+        :rtype: DataFrame
+        """
         spent_time = timeit.default_timer()
         proxies_df = pd.read_csv(path, dtype=str)
 
@@ -79,6 +183,20 @@ class SolventsSector(Sector):
         return proxies_df
 
     def check_profiles(self):
+        """
+        Check that the profiles appear on the profile files.
+
+        It will check the content of the proxies map.
+        Check that the 'P_month' content appears on the monthly profiles.
+        Check that the 'P_week' content appears on the weekly profiles.
+        Check that the 'P_hour' content appears on the hourly profiles.
+        Check that the 'P_spec' content appears on the speciation profiles.
+
+        It will stop teh execution if the requirements are not satisfied.
+
+        :return: True when everything is OK.
+        :rtype: bool
+        """
         spent_time = timeit.default_timer()
         # Checking monthly profiles IDs
         links_month = set(np.unique(self.proxies_map['P_month'].dropna().values))
@@ -113,6 +231,20 @@ class SolventsSector(Sector):
         return True
 
     def read_yearly_emissions(self, path, nut_list):
+        """
+        Read the yearly emission by snap and nuts2.
+
+        Select only the nuts2 IDs that appear in the selected domain.
+
+        :param path: Path to the CSV file that contains the yearly emissions by snap and nuts2.
+        :type path: str
+
+        :param nut_list: List of nut codes
+        :type nut_list: list
+
+        :return: Dataframe with thew amount of NMVOC for each snap and nut2
+        :rtype: DataFrame
+        """
         spent_time = timeit.default_timer()
 
         year_emis = pd.read_csv(path, dtype={'nuts2_id': int, 'snap': str, 'nmvoc': np.float64})
@@ -124,6 +256,15 @@ class SolventsSector(Sector):
         return year_emis
 
     def get_population_by_nut2(self, path):
+        """
+        Read the CSV file that contains the amount of population by nut2.
+
+        :param path: Path to the CSV file that contains the amount of population by nut2.
+        :type path: str
+
+        :return: Dataframe with the amount of population by nut2.
+        :rtype: DataFrame
+        """
         spent_time = timeit.default_timer()
 
         pop_by_nut2 = pd.read_csv(path)
@@ -134,6 +275,18 @@ class SolventsSector(Sector):
         return pop_by_nut2
 
     def get_point_sources_weights_by_nut2(self, path, proxy_name):
+        """
+        Read the CSV file that contains the amount of weight by industry and nut2.
+        
+        :param path: Path to the CSV file that contains the amount of weight by industry and nut2.
+        :type path: str
+        
+        :param proxy_name: Proxy to calculate.
+        :type proxy_name: str
+        
+        :return: Dataframe with the amount of weight by industry and nut2.
+        :rtype: DataFrame
+        """
         spent_time = timeit.default_timer()
 
         weights_by_nut2 = pd.read_csv(path)
@@ -148,6 +301,21 @@ class SolventsSector(Sector):
         return weights_by_nut2
 
     def get_land_use_by_nut2(self, path, land_uses, nut_codes):
+        """
+        Read the CSV file that contains the amount of land use by nut2.
+
+        :param path: Path to the CSV file that contains the amount of land use by nut2.
+        :type path: str
+
+        :param land_uses: List of land uses to take into account.
+        :type land_uses: list
+
+        :param nut_codes: List of nut2 codes to take into account.
+        :type nut_codes: list
+
+        :return: DataFrame with the amount of land use by nut2.
+        :rtype: DataFrame
+        """
         spent_time = timeit.default_timer()
 
         land_use_by_nut2 = pd.read_csv(path)
@@ -159,6 +327,21 @@ class SolventsSector(Sector):
         return land_use_by_nut2
 
     def get_population_proxy(self, pop_raster_path, pop_by_nut2_path, nut2_shapefile_path):
+        """
+        Calculate the distribution based on the amount of population.
+
+        :param pop_raster_path: Path to the raster file that contains the population information.
+        :type pop_raster_path: str
+
+        :param pop_by_nut2_path: Path to the CSV file that contains the amount of population by nut2.
+        :type pop_by_nut2_path: str
+
+        :param nut2_shapefile_path: Path to the shapefile that contains the nut2.
+        :type nut2_shapefile_path: str
+
+        :return: GeoDataFrame with the population distribution by destiny cell.
+        :rtype: GeoDataFrame
+        """
         spent_time = timeit.default_timer()
 
         # 1st Clip the raster
@@ -189,7 +372,7 @@ class SolventsSector(Sector):
         pop_shp['pop_percent'] = pop_shp['population'] / pop_shp['tot_pop']
         pop_shp.drop(columns=['tot_pop', 'population'], inplace=True)
 
-        # 5th Calculate percent by dest_cell
+        # 5th Calculate percent by destiny cell
         self.logger.write_log("\t\tCalculating population percentage on destiny resolution", message_level=3)
         pop_shp.to_crs(self.grid.shapefile.crs, inplace=True)
         pop_shp['src_inter_fraction'] = pop_shp.geometry.area
@@ -206,6 +389,24 @@ class SolventsSector(Sector):
         return popu_dist
 
     def get_land_use_proxy(self, land_use_raster, land_use_by_nut2_path, land_uses, nut2_shapefile_path):
+        """
+        Calculate the distribution based on the amount of land use.
+
+        :param land_use_raster: Path to the raster file that contains the land use information.
+        :type land_use_raster: str
+
+        :param land_use_by_nut2_path: Path to the CSV file that contains the amount of land use by nut2.
+        :type land_use_by_nut2_path: str
+
+        :param land_uses: List of land uses to take into account on the distribution.
+        :type land_uses: list
+
+        :param nut2_shapefile_path: Path to the shapefile that contains the nut2.
+        :type nut2_shapefile_path: str
+
+        :return: GeoDataFrame with the land use distribution for the selected land uses by destiny cell.
+        :rtype: GeoDataFrame
+        """
         spent_time = timeit.default_timer()
         # 1st Clip the raster
         self.logger.write_log("\t\tCreating clipped land use raster", message_level=3)
@@ -262,6 +463,25 @@ class SolventsSector(Sector):
 
     def get_point_shapefile_proxy(self, proxy_name, point_shapefile_path, point_sources_weight_by_nut2_path,
                                   nut2_shapefile_path):
+        """
+        Calculate the distribution for the solvent sub sector in the destiny grid cell.
+
+        :param proxy_name: Name of the proxy to be calculated.
+        :type proxy_name: str
+
+        :param point_shapefile_path: Path to the shapefile that contains all the point sources ant their weights.
+        :type point_shapefile_path: str
+
+        :param point_sources_weight_by_nut2_path: Path to the CSV file that contains the amount of weight by industry
+        and nut2.
+        :type point_sources_weight_by_nut2_path: str
+
+        :param nut2_shapefile_path: Path to the shapefile that contains the nut2.
+        :type nut2_shapefile_path: str
+
+        :return: GeoDataFrame with the distribution of the selected proxy on the destiny grid cells.
+        :rtype: GeoDataFrame
+        """
         spent_time = timeit.default_timer()
 
         point_shapefile = IoShapefile(self.comm).read_shapefile_parallel(point_shapefile_path)
@@ -295,19 +515,39 @@ class SolventsSector(Sector):
                             land_uses_nuts2_path, nut2_shapefile_path, point_sources_shapefile_path,
                             point_sources_weight_by_nut2_path):
         """
+        Calcualte (or read) the proxy shapefile.
 
-        :param population_raster_path:
-        :param population_nuts2_path:
-        :param land_uses_raster_path:
-        :param land_uses_nuts2_path:
-        :param nut2_shapefile_path:
-        :param point_sources_shapefile_path:
-        :param point_sources_weight_by_nut2_path:
-        :return:
-        :rtype: DataFrame
+        It will split the entire shapoefile into as many processors as selected to split the calculation part.
+
+        :param population_raster_path: Path to the raster file that contains the population information.
+        :type population_raster_path: str
+
+        :param population_nuts2_path: Path to the CSV file that contains the amount of population by nut2.
+        :type population_nuts2_path: str
+
+        :param land_uses_raster_path: Path to the raster file that contains the land use information.
+        :type land_uses_raster_path: str
+
+        :param land_uses_nuts2_path: Path to the CSV file that contains the amount of land use by nut2.
+        :type land_uses_nuts2_path: str
+
+        :param nut2_shapefile_path: Path to the shapefile that contains the nut2.
+        :type nut2_shapefile_path: str
+
+        :param point_sources_shapefile_path: Path to the shapefile that contains all the point sources ant their
+        weights.
+        :type point_sources_shapefile_path: str
+
+        :param point_sources_weight_by_nut2_path: Path to the CSV file that contains the amount of weight by industry
+        and nut2.
+        :type point_sources_weight_by_nut2_path: str
+
+        :return: GeoDataFrame with all the proxies
+        :rtype: GeoDataFrame
         """
         spent_time = timeit.default_timer()
-        self.logger.write_log("Getting proxies shapefile")
+
+        self.logger.write_log("Getting proxies shapefile", message_level=1)
         proxy_names_list = np.unique(self.proxies_map['proxy_name'])
         proxy_path = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.shp')
         if not os.path.exists(proxy_path):
@@ -354,6 +594,15 @@ class SolventsSector(Sector):
         return proxies
 
     def calculate_hourly_emissions(self, yearly_emissions):
+        """
+        Disaggrate to hourly level the yearly emissions.
+
+        :param yearly_emissions: GeoDataFrame with the yearly emissions by destiny cell ID and snap code.
+        :type yearly_emissions: GeoDataFrame
+
+        :return: GeoDataFrame with the hourly distribution by FID, snap code and time step.
+        :rtype: GeoDataFrame
+        """
         def get_mf(df):
             month_factor = self.monthly_profiles.loc[df.name[1], df.name[0]]
 
@@ -373,6 +622,9 @@ class SolventsSector(Sector):
             df['HF'] = hour_factor
             return df.loc[:, ['HF']]
 
+        spent_time = timeit.default_timer()
+
+        self.logger.write_log('\tHourly disaggregation', message_level=2)
         emissions = self.add_dates(yearly_emissions, drop_utc=True)
 
         emissions['month'] = emissions['date'].dt.month
@@ -391,9 +643,19 @@ class SolventsSector(Sector):
         emissions.drop(columns=['temp_factor'], inplace=True)
         emissions.set_index(['FID', 'snap', 'tstep'], inplace=True)
 
+        self.logger.write_time_log('SolventsSector', 'calculate_hourly_emissions', timeit.default_timer() - spent_time)
         return emissions
 
     def distribute_yearly_emissions(self):
+        """
+        Calcualte the yearly emission by destiny grid cell and snap code.
+
+        :return: GeoDataFrame with the yearly emissions by snap code.
+        :rtype: GeoDataFrame
+        """
+        spent_time = timeit.default_timer()
+        self.logger.write_log('\t\tYearly distribution', message_level=2)
+
         yearly_emis = self.read_yearly_emissions(
             self.yearly_emissions_path, np.unique(self.proxy.index.get_level_values('nut_code')))
         year_nuts = np.unique(yearly_emis.index.get_level_values('nuts2_id'))
@@ -411,9 +673,7 @@ class SolventsSector(Sector):
             emis['P_week'] = snap_df['P_week']
             emis['P_hour'] = snap_df['P_hour']
             emis['P_spec'] = snap_df['P_spec']
-            # print(yearly_emis.index)
-            # print(yearly_emis.loc[(9, '060408'), 'nmvoc'])
-            # exit()
+
             emis['nmvoc'] = emis.apply(lambda row: yearly_emis.loc[(row['nut_code'], snap), 'nmvoc'] * row[
                 self.proxies_map.loc[snap, 'proxy_name']], axis=1)
 
@@ -421,10 +681,22 @@ class SolventsSector(Sector):
             emis_list.append(emis[['P_month', 'P_week', 'P_hour', 'P_spec', 'nmvoc', 'geometry']])
         emis = pd.concat(emis_list).sort_index()
         emis = emis[emis['nmvoc'] > 0]
-        # emis = IoShapefile(self.comm).balance(emis)
+
+        self.logger.write_time_log('SolventsSector', 'distribute_yearly_emissions', timeit.default_timer() - spent_time)
         return emis
 
     def speciate(self, dataframe, code='default'):
+        """
+        Spectiate the NMVOC pollutant into as many pollutants as the speciation map indicates.
+
+        :param dataframe: Emissions to be speciated.
+        :type dataframe: DataFrame
+
+        :param code: NOt used.
+
+        :return: Speciated emissions.
+        :rtype: DataFrame
+        """
 
         def calculate_new_pollutant(x, out_p):
             sys.stdout.flush()
@@ -432,19 +704,33 @@ class SolventsSector(Sector):
             x[out_p] = x['nmvoc'] * (profile['VOCtoTOG'] * profile[out_p])
             return x[[out_p]]
 
+        spent_time = timeit.default_timer()
+        self.logger.write_log('\tSpeciation emissions', message_level=2)
+
         new_dataframe = gpd.GeoDataFrame(index=dataframe.index, data=None, crs=dataframe.crs,
                                          geometry=dataframe.geometry)
         for out_pollutant in self.output_pollutants:
+            self.logger.write_log('\t\tSpeciating {0}'.format(out_pollutant), message_level=3)
             new_dataframe[out_pollutant] = dataframe.groupby('P_spec').apply(
                 lambda x: calculate_new_pollutant(x, out_pollutant))
         new_dataframe.reset_index(inplace=True)
 
         new_dataframe.drop(columns=['snap', 'geometry'], inplace=True)
         new_dataframe.set_index(['FID', 'tstep'], inplace=True)
+
+        self.logger.write_time_log('SolventsSector', 'speciate', timeit.default_timer() - spent_time)
         return new_dataframe
 
     def calculate_emissions(self):
-        # Distribute emissions first.
+        """
+        Main function to calculate the emissions.
+
+        :return: Solvent emissions.
+        :rtype: DataFrame
+        """
+        spent_time = timeit.default_timer()
+        self.logger.write_log('\tCalculating emissions')
+
         emissions = self.distribute_yearly_emissions()
         emissions = self.calculate_hourly_emissions(emissions)
         emissions = self.speciate(emissions)
@@ -452,4 +738,6 @@ class SolventsSector(Sector):
         emissions.reset_index(inplace=True)
         emissions['layer'] = 0
         emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum()
+
+        self.logger.write_time_log('SolventsSector', 'calculate_emissions', timeit.default_timer() - spent_time)
         return emissions
-- 
GitLab


From 34c7dda774f6751a3ae1df649efaad00df6c323f Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 10 Sep 2019 18:23:35 +0200
Subject: [PATCH 21/22] PEP8 corrections

---
 hermesv3_bu/sectors/solvents_sector.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index cd2843f..a068a4b 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -277,13 +277,13 @@ class SolventsSector(Sector):
     def get_point_sources_weights_by_nut2(self, path, proxy_name):
         """
         Read the CSV file that contains the amount of weight by industry and nut2.
-        
+
         :param path: Path to the CSV file that contains the amount of weight by industry and nut2.
         :type path: str
-        
+
         :param proxy_name: Proxy to calculate.
         :type proxy_name: str
-        
+
         :return: Dataframe with the amount of weight by industry and nut2.
         :rtype: DataFrame
         """
-- 
GitLab


From 369949773b0b5da623f9c69f2f90a30ffa3cfafe Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 10 Sep 2019 18:24:20 +0200
Subject: [PATCH 22/22] PEP8 corrections

---
 hermesv3_bu/sectors/solvents_sector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py
index a068a4b..a54cf9e 100755
--- a/hermesv3_bu/sectors/solvents_sector.py
+++ b/hermesv3_bu/sectors/solvents_sector.py
@@ -284,7 +284,7 @@ class SolventsSector(Sector):
         :param proxy_name: Proxy to calculate.
         :type proxy_name: str
 
-        :return: Dataframe with the amount of weight by industry and nut2.
+        :return: DataFrame with the amount of weight by industry and nut2.
         :rtype: DataFrame
         """
         spent_time = timeit.default_timer()
-- 
GitLab