diff --git a/.codacy.yml b/.codacy.yml new file mode 100755 index 0000000000000000000000000000000000000000..bf12fc2d77140a852ff1b9197370766a0ecec7c3 --- /dev/null +++ b/.codacy.yml @@ -0,0 +1,21 @@ +# codacy configuration file + +--- + +engines: + coverage: + enabled: true + metrics: + enabled: true + duplication: + enabled: true + prospector: + enabled: true + pylint: + enabled: true + python_version: 2 + +exclude_paths: [ + 'doc/**', + 'data/**', +] diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000000000000000000000000000000000000..2849b446a4e038c0ba9ff3ff742a6c84c1aaffa4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea +.directory +*.pyc \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100755 index 0000000000000000000000000000000000000000..eeab6d0da1ca657ee271f322b238dc0aacc41e92 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,43 @@ +before_script: + - export GIT_SSL_NO_VERIFY=1 + - export PATH="$HOME/miniconda2/bin:$PATH" + +stages: + - prepare + - test + - report + - clean + +cache: + paths: + - test/report + +prepare: + stage: prepare + script: + - conda update conda + +test_python2: + stage: test + script: + - conda env update -f environment.yml -n hermesv3_gr python=2.7 + - source activate hermesv3_gr + - python run_test.py + # - pip install codacy-coverage --upgrade + # - python-codacy-coverage -r tests/report/python2/coverage.xml + +#test_python3: +# stage: test +# script: +# - git submodule sync --recursive +# - git submodule update --init --recursive +# - conda env update -f environment.yml -n earthdiagnostics3 python=3.6 +# - source activate earthdiagnostics3 +# - python run_test.py + +clean: + stage: clean + script: + - conda clean --all --yes + + diff --git a/.prospector.yml b/.prospector.yml new file mode 100755 index 0000000000000000000000000000000000000000..b9c6fa952133de951c9005edb1b2f01d9bcb851b --- /dev/null +++ b/.prospector.yml @@ -0,0 +1,36 @@ +# prospector configuration file + +--- + +output-format: grouped + +strictness: veryhigh +doc-warnings: true +test-warnings: true +member-warnings: false + +pyroma: + run: true + +pydocroma: + run: true + +pep8: + disable: [ + E501, # Line-length, already controlled by pylint + ] + +pep257: + run: true + # see http://pep257.readthedocs.io/en/latest/error_codes.html + disable: [ + # For short descriptions it makes sense not to end with a period: + D400, # First line should end with a period + # Disable because not part of PEP257 official convention: + D203, # 1 blank line required before class docstring + D212, # Multi-line docstring summary should start at the first line + D213, # Multi-line docstring summary should start at the second line + D404, # First word of the docstring should not be This + D107, # We are using numpy style and constructor should be documented in class docstring + D105, # Docstring in magic methods should not be required: we all now what they are for + ] diff --git a/CHANGELOG b/CHANGELOG old mode 100644 new mode 100755 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8238cd193e02a05ccf4207570db0d4333a2e8c3b --- a/CHANGELOG +++ b/CHANGELOG @@ -0,0 +1,21 @@ +0.0.0 + 2019/XX/XX + + First beta version: + + - Grid options: + 1. Regular Lat-Lon + 2. Rotated + 3. Mercator + 4. Lambert conformal conic + + - Clip options: + 1. Shapefile clip: path to a shapefile + 2. Custom clip: list of lat-lon points + 3. Default clip: unary union of the desired output grid + + - Sector Manager: + 1. Aviation sector + + - Writing options: + 1. Default writer \ No newline at end of file diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/conf/hermes.conf b/conf/hermes.conf new file mode 100755 index 0000000000000000000000000000000000000000..a2bd0a3acdeb2084405b8743f04a5f50f5c76c5e --- /dev/null +++ b/conf/hermes.conf @@ -0,0 +1,332 @@ +[GENERAL] +log_level = 3 +input_dir = /home/Earth/ctena/Models/hermesv3_bu_data +data_path = /esarchive/recon +output_dir = /scratch/Earth/HERMESv3_BU_OUT +output_name = HERMES_.nc +emission_summary = 0 +start_date = 2016/11/29 00:00:00 +# ----- end_date = start_date [DEFAULT] ----- +# end_date = 2010/01/01 00:00:00 +output_timestep_num = 25 +auxiliary_files_path = /scratch/Earth/HERMESv3_BU_aux/_ +erase_auxiliary_files = 0 + + +[DOMAIN] + +# domain_type=[lcc, rotated, mercator, regular] +domain_type = lcc +# output_type=[MONARCH, CMAQ, WRF_CHEM, DEFAULT] +output_model = DEFAULT +output_attributes = /writing/global_attributes_WRF-Chem.csv +vertical_description = /profiles/vertical/MONARCH_Global_48layers_vertical_description.csv +#vertical_description = /profiles/vertical/CMAQ_15layers_vertical_description.csv + +# if domain_type == rotated: + centre_lat = 51 + centre_lon = 10 + west_boundary = -35 + south_boundary = -27 + inc_rlat = 0.2 + inc_rlon = 0.2 + +# if domain_type == lcc: + + # CALIOPE + lat_1 = 37 + lat_2 = 43 + lon_0 = -3 + lat_0 = 40 + + # TEST + #nx = 30 + #ny = 30 + #inc_x = 10000 + #inc_y = 10000 + #x_0 = 253151.59375 + #y_0 = 43862.90625 + + # CATALUNYA + #nx = 278 + #ny = 298 + #inc_x = 1000 + #inc_y = 1000 + #x_0 = 253151.59375 + #y_0 = 43862.90625 + + # CATALUNYA test + nx = 28 + ny = 30 + inc_x = 10000 + inc_y = 10000 + x_0 = 253151.59375 + y_0 = 43862.90625 + + # EUROPA + #nx = 478 + #ny = 398 + #inc_x = 12000 + #inc_y = 12000 + #x_0 = -2131849.000 + #y_0 = -2073137.875 + + # IP + # nx = 397 + # ny = 397 + # inc_x = 4000 + # inc_y = 4000 + # x_0 = -807847.688 + # y_0 = -797137.125 + + # MAD + #nx = 146 + #ny = 158 + #inc_x = 1000 + #inc_y = 1000 + #x_0 = -142848.422 + #y_0 = -20137.891 + +# if domain_type == mercator: + #lat_ts = -1.5 + #lon_0 = -18 + #nx = 210 + #ny = 236 + #inc_x = 50000 + #inc_y = 50000 + #x_0 = -126017.5 + #y_0 = -5407460 + +# if domain_type == regular: + lat_orig = 41.1 + lon_orig = 1.8 + inc_lat = 0.1 + inc_lon = 0.1 + n_lat = 10 + n_lon = 10 + + +[CLIPPING] +# clipping = /Shapefiles/barcelona/barcelona_munic.shp +# clipping = 2.2 41.41, 2.15 41.41, 2.2 41.42, 2.15 41.42 +# clipping = 2.2 41.41, 2.19 41.41 + +#################################################################### +# SECTORS +#################################################################### +[SECTOR MANAGEMENT] +writing_processors = 1 + +aviation_processors = 0 +shipping_port_processors = 0 +livestock_processors = 0 +crop_operations_processors = 0 +crop_fertilizers_processors = 0 +agricultural_machinery_processors = 0 +residential_processors = 0 +recreational_boats_processors = 0 +point_sources_processors = 0 +traffic_processors = 0 +traffic_area_processors = 0 + + +[SHAPEFILES] +nut_shapefile_prov = /Shapefiles/Provinces/ES_Provinces.shp +nut_shapefile_ccaa = /Shapefiles/CCAA/ES_CCAA.shp +population_density_map = /jrc/ghsl/original_files/GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0.tif + +[SPECIATION DATA] +speciation_map = /profiles/speciation/map_base.csv +molecular_weights = /profiles/speciation/MolecularWeights.csv + +[METEO PATHS] +temperature_hourly_files_path = /ecmwf/era5/original_files/reorder/1hourly/tas/ +temperature_daily_files_path = /ecmwf/era5/original_files/reorder/daily_mean/tas/ +wind_speed_daily_files_path = /ecmwf/era5/original_files/reorder/daily_mean/sfcWind/ +precipitation_files_path = /ecmwf/era5/original_files/reorder/1hourly/prlr/ + +temperature_4d_dir = /esarchive/exp/monarch/a1wd/regional/hourly/t +temperature_sfc_dir = /esarchive/exp/monarch/a1wd/regional/hourly/t2 +u_wind_speed_4d_dir = /esarchive/exp/monarch/a1wd/regional/hourly/u +v_wind_speed_4d_dir = /esarchive/exp/monarch/a1wd/regional/hourly/v +u10_wind_speed_dir = /esarchive/exp/monarch/a1wd/regional/hourly/u10 +v10_wind_speed_dir = /esarchive/exp/monarch/a1wd/regional/hourly/v10 +friction_velocity_dir = /esarchive/exp/monarch/a1wd/regional/hourly/ustar +pblh_dir = /esarchive/exp/monarch/a1wd/regional/hourly/mixed_layer_height +obukhov_length_dir = /esarchive/exp/monarch/a1wd/regional/hourly/rmol +layer_thickness_dir = /esarchive/exp/monarch/a1wd/regional/hourly/layer_thickness + + +[AVIATION SECTOR] +# With 'hc' is calculated 'nmvoc' and 'ch4' +aviation_source_pollutants = nox_no2, co, hc, so2, pm10, pm25, co2 +# airport_list = LEBL +# plane_list = +airport_shapefile_path = /aviation/Airports.shp +airport_runways_shapefile_path = /aviation/Runways.shp +airport_runways_corners_shapefile_path = /aviation/Runway_Corners.shp +airport_trajectories_shapefile_path = /aviation/Air_Trajectory.shp +airport_operations_path = /aviation/operations_2015.csv +planes_path = /aviation/planes.csv +airport_times_path = /aviation/times.csv +airport_ef_dir = /aviation/ef +aviation_weekly_profiles = /profiles/temporal/aviation/weekly_profiles.csv +aviation_hourly_profiles = /profiles/temporal/aviation/hourly_profiles.csv +aviation_speciation_profiles = /profiles/speciation/aviation/speciation_profiles_base.csv + + +[SHIPPING PORT SECTOR] +shipping_port_source_pollutants = nox_no2, pm10, pm25, co, so2, nmvoc, ch4, nh3, co2 +vessel_list = LC,DC,GC,RO,FE,CR,CO,TU,OT +port_list = ACO, ALC, ALI, ALM, ARI, ARR, AVI, ALG, BAR, BIL, CAB, CAD, CSA, CAR, CTG, CAS, CEU, HIE, FER, GAN, GIJ, HUE, IBI, LPM, LCR, MAH, MAL, MPO, MEL, MOT, PMA, PAS, PRO, PSM, SSG, SCP, SCT, SAG, SAL, SCI, SAN, SEV, TAR, TRG, VAL, VIG, VIL, ZFC +hoteling_shapefile_path = /Shapefiles/shipping_port/Areas_Hot_Puertos_and_BCN.shp +maneuvering_shapefile_path = /Shapefiles/shipping_port/Areas_Maneuv_Puertos_and_BCN.shp +shipping_port_ef_path = /shipping_port/ef/engines_fuel_EF.csv +shipping_port_engine_percent_path = /shipping_port/ef/ship_perc_engines.csv +shipping_port_tonnage_path = /shipping_port/ship_operations_GT_2015.csv +shipping_port_load_factor_path = /shipping_port/ship_phase_LF_time.csv +shipping_port_power_path = /shipping_port/ship_power_ME_AE.csv +shipping_port_monthly_profiles = /profiles/temporal/shipping_port/monthly_profiles.csv +shipping_port_weekly_profiles = /profiles/temporal/shipping_port/weekly_profiles.csv +shipping_port_hourly_profiles = /profiles/temporal/shipping_port/hourly_profiles.csv +shipping_port_speciation_profiles = /profiles/speciation/shipping_port/speciation_profiles_base.csv + +[LIVESTOCK] +livestock_source_pollutants = nox_no, nh3, nmvoc, pm10, pm25 +animal_list = cattle chicken goats pigs sheep +gridded_livestock = /fao/glwv3/original_files/_2010_glwv3.tif +correction_split_factors = /livestock/_2016.csv +denominator_yearly_factor_dir = /ecmwf/era5/yearly/dailyfactorlivestock +livestock_ef_files_dir = /livestock/ef +livestock_monthly_profiles = /profiles/temporal/livestock/monthly_profiles.csv +livestock_weekly_profiles = /profiles/temporal/livestock/weekly_profiles.csv +livestock_hourly_profiles = /profiles/temporal/livestock/hourly_profiles.csv +livestock_speciation_profiles = /profiles/speciation/livestock/speciation_profiles_base.csv + +[AGRICULTURAL] +land_uses_path = /ecmwf/clc/original_files/g250_clc12_v18_5a/g250_clc12_V18_5.tif +land_use_by_nut_path = /agriculture/land_use_ccaa.csv +crop_by_nut_path = /agriculture/crops_ha_2017.csv +crop_from_landuse_path = /agriculture/map_crops_landuse.csv + +[AGRICULTURAL_CROP_OPERATIONS] +crop_operations_source_pollutants = pm10, pm25 +crop_operations_list = barley, oats, rye, wheat +crop_operations_ef_files_dir = /agriculture/agriculture_crop_operations/ef +crop_operations_monthly_profiles = /profiles/temporal/agriculture_crop_operations/monthly_profiles.csv +crop_operations_weekly_profiles = /profiles/temporal/agriculture_crop_operations/weekly_profiles.csv +crop_operations_hourly_profiles = /profiles/temporal/agriculture_crop_operations/hourly_profiles.csv +crop_operations_speciation_profiles = /profiles/speciation/agriculture_crop_operations/speciation_profiles_base.csv + +[AGRICULTURAL_CROP_FERTILIZERS] +crop_fertilizers_source_pollutants = nh3 +crop_fertilizers_list = alfalfa, almond, apple, apricot, barley, cherry, cotton, fig, grape, lemonlime, maize, melonetc, oats, olive, orange, pea, peachetc, pear, potato, rice, rye, sunflower, tangetc, tomato, triticale, vetch, watermelon, wheat +cultivated_ratio = /agriculture/agriculture_fertilizers/fertilized_cultivated_ratio.csv +fertilizers_rate = /agriculture/agriculture_fertilizers/fertilizer_ratio.csv +crop_f_parameter = /agriculture/agriculture_fertilizers/ef/crops_f_parameter.csv +crop_f_fertilizers = /agriculture/agriculture_fertilizers/ef/f_fertilizers.csv +gridded_ph = /isric/soildgrids250m/original_files/PHIHOX_M_sl1_250m.tif +gridded_cec = /isric/soildgrids250m/original_files/CECSOL_M_sl1_250m.tif +fertilizers_denominator_yearly_factor_path = /ecmwf/era5/yearly/dailyfactorfertilizer/_.nc +crop_calendar = /profiles/temporal/agriculture_fertilizers/crop_calendar.csv +crop_fertilizers_hourly_profiles = /profiles/temporal/agriculture_fertilizers/hourly_profiles.csv +crop_fertilizers_speciation_profiles = /profiles/speciation/agriculture_fertilizers/speciation_profiles_base.csv +crop_growing_degree_day_path = /ecmwf/era5/yearly/growingdegreeday/_crop_.nc + +[AGRICULTURAL_CROP_MACHINERY] +crop_machinery_source_pollutants = nox_no2,co,nmvoc,nh3,pm10,pm25,ch4,n2o,so2,co2 +crop_machinery_list = barley, oats, rye, wheat +machinery_list = tractors, harvesters, rotavators +crop_machinery_deterioration_factor_path = /agriculture/agricultural_machinery/DF_2015.csv +crop_machinery_load_factor_path = /agriculture/agricultural_machinery/LF_2015.csv +crop_machinery_vehicle_ratio_path = /agriculture/agricultural_machinery/Antique_2015.csv +crop_machinery_vehicle_units_path = /agriculture/agricultural_machinery/Units_2015.csv +crop_machinery_vehicle_workhours_path = /agriculture/agricultural_machinery/Workhours_2015.csv +crop_machinery_vehicle_power_path = /agriculture/agricultural_machinery/Power_2015.csv +crop_machinery_ef_path = /agriculture/agricultural_machinery/ef.csv +crop_machinery_monthly_profiles = /profiles/temporal/agricultural_machinery/monthly_profiles.csv +crop_machinery_weekly_profiles = /profiles/temporal/agricultural_machinery/weekly_profiles.csv +crop_machinery_hourly_profiles = /profiles/temporal/agricultural_machinery/hourly_profiles.csv +crop_machinery_speciation_profiles = /profiles/speciation/agricultural_machinery/speciation_profiles_base.csv +crop_machinery_by_nut = /agriculture/agricultural_machinery/crops_ha_prov_2017.csv + +[RESIDENTIAL] +fuel_list = B_res, B_com +residential_source_pollutants = nox_no2, so2, co, nh3, pm10, pm25, nmvoc +population_type_map = /jrc/ghsl/original_files/GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0.tif +population_type_by_ccaa = /residential/pop_type_ccaa.csv +population_type_by_prov = /residential/pop_type_prov.csv +energy_consumption_by_prov = /residential/energy_consumption_nuts3.csv +energy_consumption_by_ccaa = /residential/energy_consumption_nuts2.csv +residential_spatial_proxies = /residential/spatial_proxies.csv +residential_ef_files_path = /residential/ef/ef.csv +residential_heating_degree_day_path = /ecmwf/era5/yearly/heatingdegreeday/hdd_.nc +residential_hourly_profiles = /profiles/temporal/residential/hourly_profiles.csv +residential_speciation_profiles = /profiles/speciation/residential/speciation_profiles_base.csv + +[RECREATIONAL_BOATS} +recreational_boats_source_pollutants = nox_no2,so2,nmvoc,co,nh3,pm10,pm25,co2,ch4 +recreational_boats_list = YB_001,YB_002,SB_001,SB_002,SP_001,SP_002,OB_001,OB_002,WS_001,WS_002,YB_003,SB_003,SP_004,SP_005,OB_002,WS_003,MB_001,MB_002,MB_003,MB_004,MB_005,MB_006,MS_001,MS_002,SB_004,SB_005 +recreational_boats_density_map = /recreational_boats/recreation_boats_area.tif +recreational_boats_by_type = /recreational_boats/recreation_boats.csv +recreational_boats_ef_path = /recreational_boats/ef_recreation_boats.csv +recreational_boats_monthly_profiles = /profiles/temporal/recreational_boats/monthly_profiles.csv +recreational_boats_weekly_profiles = /profiles/temporal/recreational_boats/weekly_profiles.csv +recreational_boats_hourly_profiles = /profiles/temporal/recreational_boats/hourly_profiles.csv +recreational_boats_speciation_profiles = /profiles/speciation/recreational_boats/speciation_profiles_base.csv + +[POINT SOURCES] +point_source_pollutants = nox_no2,nmvoc,so2,co,nh3,pm10,pm25,ch4,n2o,co2 +plume_rise = True +# point_source_snaps = 09 +point_source_catalog = /point_sources/Maestra_Focos_SNAP01030409_2015_plume_rise.csv +point_source_monthly_profiles = /profiles/temporal/point_sources/monthly_profiles.csv +point_source_weekly_profiles = /profiles/temporal/point_sources/weekly_profiles.csv +point_source_hourly_profiles = /profiles/temporal/point_sources/hourly_profiles.csv +point_source_speciation_profiles = /profiles/speciation/point_sources/speciation_profiles_base.csv +point_source_measured_emissions = /point_sources/measured_emis/_measured_emissions.csv + +[TRAFFIC SECTOR] +do_hot = 1 +do_cold = 1 +do_tyre_wear = 1 +do_brake_wear = 1 +do_road_wear = 1 +do_resuspension = 1 +resuspension_correction = 1 +write_rline = 0 + +traffic_pollutants = nox_no2, nh3, co, so2, pm, voc, ch4 +vehicle_types = PCD_13 PCD_14 PCD_15 PCG_25 PCG_26 PCG_27 +# load = [0, 0.5, 1] +load = 0.5 +road_link_path = /traffic/road_links/2015/road_links_2015.shp +fleet_compo_path = /traffic/fleet_compo/2015/fleet_compo_2015.csv +traffic_ef_path = /traffic/ef +traffic_speed_hourly_path = /profiles/temporal/traffic/speed_hourly.csv +traffic_monthly_profiles = /profiles/temporal/traffic/aadt_m_mn.csv +traffic_weekly_profiles = /profiles/temporal/traffic/aadt_week.csv +traffic_hourly_profiles_mean = /profiles/temporal/traffic/aadt_h_mn.csv +traffic_hourly_profiles_weekday = /profiles/temporal/traffic/aadt_h_wd.csv +traffic_hourly_profiles_saturday = /profiles/temporal/traffic/aadt_h_sat.csv +traffic_hourly_profiles_sunday = /profiles/temporal/traffic/aadt_h_sun.csv +traffic_speciation_profile_hot_cold = /profiles/speciation/traffic/hot_cold_cmaq_cb05_aero5.csv +traffic_speciation_profile_tyre = /profiles/speciation/traffic/tyre_cmaq_cb05_aero5.csv +traffic_speciation_profile_road = /profiles/speciation/traffic/road_cmaq_cb05_aero5.csv +traffic_speciation_profile_brake = /profiles/speciation/traffic/brake_cmaq_cb05_aero5.csv +traffic_speciation_profile_resuspension = /profiles/speciation/traffic/resuspension_cmaq_cb05_aero5.csv + +[TRAFFIC AREA SECTOR] +traffic_area_pollutants = nox_no2,nmvoc,so2,co,nh3,pm10,pm25 +do_evaporative = 1 +traffic_area_gas_path = /traffic_area/gasoline_vehicles_provinces_2015.csv +popullation_by_municipality = /traffic_area/population_by_mun.csv +traffic_area_speciation_profiles_evaporative = /profiles/speciation/traffic_area/evaporative_base.csv +traffic_area_evaporative_ef_file = /traffic_area/ef/evaporative_nmvoc.csv +do_small_cities = 1 +traffic_area_small_cities_path = /Shapefiles/small_cities/small_cities.shp +traffic_area_speciation_profiles_small_cities = /profiles/speciation/traffic_area/small_cities_base.csv +traffic_area_small_cities_ef_file = /traffic_area/ef/small_cities.csv +small_cities_monthly_profile = /profiles/temporal/traffic_area/small_cities_monthly_profiles.csv +small_cities_weekly_profile = /profiles/temporal/traffic_area/small_cities_weekly_profiles.csv +small_cities_hourly_profile = /profiles/temporal/traffic_area/small_cities_hourly_profiles.csv diff --git a/environment.yml b/environment.yml new file mode 100755 index 0000000000000000000000000000000000000000..fc71b53fb3264847339c3d43aba843e8614b2232 --- /dev/null +++ b/environment.yml @@ -0,0 +1,27 @@ +--- + +name: hermesv3_bu + +channels: + - conda-forge + - anaconda + +dependencies: + - python = 2 + - numpy + - netcdf4 >= 1.3.1 + - python-cdo >= 1.3.6 + - geopandas + - pyproj + - configargparse + - cf_units >= 1.1.3 + - pytz + - timezonefinder + - mpi4py + # Testing + - pytest + - pytest-cov + - pycodestyle + - shapely + - pip: + - holidays diff --git a/hermesv3_bu/__init__.py b/hermesv3_bu/__init__.py old mode 100644 new mode 100755 index 3aa0d7b3c4a82ed4ae8d4ffa80115156f9785f46..6c8e6b979c5f58121ac7ee2d9e024749da3a8ce1 --- a/hermesv3_bu/__init__.py +++ b/hermesv3_bu/__init__.py @@ -1 +1 @@ -__version__ = "0.0.0" \ No newline at end of file +__version__ = "0.0.0" diff --git a/hermesv3_bu/modules/__init__.py b/hermesv3_bu/clipping/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from hermesv3_bu/modules/__init__.py rename to hermesv3_bu/clipping/__init__.py diff --git a/hermesv3_bu/clipping/clip.py b/hermesv3_bu/clipping/clip.py new file mode 100755 index 0000000000000000000000000000000000000000..5b2ebb6598dd1e964c49bcf340817d948e4bbd5f --- /dev/null +++ b/hermesv3_bu/clipping/clip.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import os +import sys +import timeit +from hermesv3_bu.logger.log import Log + + +def select_clip(comm, logger, auxiliary_path, clipping, grid): + """ + Create and initialise the clip. + + :param comm: MPI communicator. + + :param logger: Logger + :type logger: Log + + :param auxiliary_path: Path to the folder to store all the needed auxiliary files. + :type auxiliary_path: str + + :param clipping: String (or None) with the path to the shapefile clip or a list of points to make the clip. + :type clipping: str + + :param grid: Desired output grid + :type grid: Grid + + :return: Clip + :rtype: Clip + """ + spent_time = timeit.default_timer() + if comm.Get_rank() == 0: + if clipping is None: + from hermesv3_bu.clipping.default_clip import DefaultClip + clip = DefaultClip(logger, auxiliary_path, grid) + elif clipping[0] == os.path.sep: + from hermesv3_bu.clipping.shapefile_clip import ShapefileClip + clip = ShapefileClip(logger, auxiliary_path, clipping) + else: + from hermesv3_bu.clipping.custom_clip import CustomClip + clip = CustomClip(logger, auxiliary_path, clipping) + else: + clip = None + + clip = comm.bcast(clip, root=0) + + logger.write_time_log('Clip', 'select_clip', timeit.default_timer() - spent_time) + return clip + + +class Clip(object): + + def __init__(self, logger, auxiliary_path): + spent_time = timeit.default_timer() + self.logger = logger + self.shapefile = None + self.shapefile_path = os.path.join(auxiliary_path, 'clip', 'clip.shp') + + self.logger.write_time_log('Clip', '__init__', timeit.default_timer() - spent_time) diff --git a/hermesv3_bu/clipping/custom_clip.py b/hermesv3_bu/clipping/custom_clip.py new file mode 100755 index 0000000000000000000000000000000000000000..f6f79b27ccb93f669bddfce8e689c19eb01a38a0 --- /dev/null +++ b/hermesv3_bu/clipping/custom_clip.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +import sys +import os +import timeit +import geopandas as gpd +from hermesv3_bu.clipping.clip import Clip +from hermesv3_bu.logger.log import Log + + +class CustomClip(Clip): + def __init__(self, logger, auxiliary_path, points_str): + """ + Initialise the Custom Clip class + + :param logger: Logger + :type logger: Log + + :param auxiliary_path: Path to the auxiliary directory. + :type auxiliary_path: str + + :param points_str: List of points in string format. + :type points_str: str + """ + spent_time = timeit.default_timer() + logger.write_log('Custom clip selected') + super(CustomClip, self).__init__(logger, auxiliary_path) + self.clip_type = 'Custom clip' + self.shapefile = self.create_clip(points_str) + self.logger.write_time_log('CustomClip', '__init__', timeit.default_timer() - spent_time) + + def create_clip(self, points_str): + """ + Create a clip using the unary union of the desired output grid. + + :param points_str: List of points (lat, lon) + :type points_str: str + + :return: Clip shapefile + :rtype: GeoDataFrame + """ + import re + from shapely.geometry import Point, Polygon + spent_time = timeit.default_timer() + if not os.path.exists(self.shapefile_path): + if not os.path.exists(os.path.dirname(self.shapefile_path)): + os.makedirs(os.path.dirname(self.shapefile_path)) + str_clip = re.split(' , | ,|, |,', points_str) + lon_list = [] + lat_list = [] + for components in str_clip: + components = re.split(' ', components) + lon_list.append(float(components[0])) + lat_list.append(float(components[1])) + + if not ((lon_list[0] == lon_list[-1]) and (lat_list[0] == lat_list[-1])): + lon_list.append(lon_list[0]) + lat_list.append(lat_list[0]) + + clip = gpd.GeoDataFrame( + geometry=[Polygon([[p.x, p.y] for p in [Point(xy) for xy in zip(lon_list, lat_list)]])], + crs={'init': 'epsg:4326'}) + + clip.to_file(self.shapefile_path) + else: + clip = gpd.read_file(self.shapefile_path) + self.logger.write_log("\tClip created at '{0}'".format(self.shapefile_path), 3) + self.logger.write_time_log('CustomClip', 'create_clip', timeit.default_timer() - spent_time) + return clip diff --git a/hermesv3_bu/clipping/default_clip.py b/hermesv3_bu/clipping/default_clip.py new file mode 100755 index 0000000000000000000000000000000000000000..f05bda8cc6ca35758b495f5b0f19a2d84770b013 --- /dev/null +++ b/hermesv3_bu/clipping/default_clip.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +import sys +import os +import timeit +import geopandas as gpd +from hermesv3_bu.clipping.clip import Clip +from hermesv3_bu.logger.log import Log + + +class DefaultClip(Clip): + def __init__(self, logger, auxiliary_path, grid): + """ + Initialise the Custom Clip class + + :param logger: Logger + :type logger: Log + + :param auxiliary_path: Path to the auxiliary directory. + :type auxiliary_path: str + + :param grid: Grid object + :type grid: Grid + """ + spent_time = timeit.default_timer() + logger.write_log('Default clip selected') + super(DefaultClip, self).__init__(logger, auxiliary_path) + self.clip_type = 'Default clip' + self.shapefile = self.create_clip(grid) + self.logger.write_time_log('DefaultClip', '__init__', timeit.default_timer() - spent_time) + + def create_clip(self, grid): + """ + Create a clip using the unary union of the desired output grid. + + :param grid: Desired output grid + :type grid: Grid + + :return: Clip shapefile + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + if not os.path.exists(self.shapefile_path): + if not os.path.exists(os.path.dirname(self.shapefile_path)): + os.makedirs(os.path.dirname(self.shapefile_path)) + + clip = gpd.GeoDataFrame(geometry=[grid.shapefile.unary_union], crs=grid.shapefile.crs) + + clip.to_file(self.shapefile_path) + else: + clip = gpd.read_file(self.shapefile_path) + self.logger.write_log("\tClip created at '{0}'".format(self.shapefile_path), 3) + self.logger.write_time_log('DefaultClip', 'create_clip', timeit.default_timer() - spent_time) + return clip diff --git a/hermesv3_bu/clipping/shapefile_clip.py b/hermesv3_bu/clipping/shapefile_clip.py new file mode 100755 index 0000000000000000000000000000000000000000..a0f1ec2d4f84339838fc7e32d9b3619b1790e164 --- /dev/null +++ b/hermesv3_bu/clipping/shapefile_clip.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +import sys +import os +import timeit +import geopandas as gpd +from hermesv3_bu.clipping.clip import Clip +from hermesv3_bu.logger.log import Log + + +class ShapefileClip(Clip): + def __init__(self, logger, auxiliary_path, clip_input_path): + """ + Initialise the Shapefile Clip class + + :param logger: Logger + :type logger: Log + + :param auxiliary_path: Path to the auxiliary directory. + :type auxiliary_path: str + + :param clip_input_path: Path to the shapefile. + :type clip_input_path: str + """ + spent_time = timeit.default_timer() + logger.write_log('Shapefile clip selected') + super(ShapefileClip, self).__init__(logger, auxiliary_path) + self.clip_type = 'Shapefile clip' + self.shapefile = self.create_clip(clip_input_path) + self.logger.write_time_log('ShapefileClip', '__init__', timeit.default_timer() - spent_time) + + def create_clip(self, clip_path): + """ + Create a clip using the unary union of the desired output grid. + + :param clip_path: Path to the shapefile that contains the clip + :type clip_path: str + + :return: Clip shapefile + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + if not os.path.exists(self.shapefile_path): + if os.path.exists(clip_path): + if not os.path.exists(os.path.dirname(self.shapefile_path)): + os.makedirs(os.path.dirname(self.shapefile_path)) + clip = gpd.read_file(clip_path) + clip = gpd.GeoDataFrame(geometry=[clip.unary_union], crs=clip.crs) + clip.to_file(self.shapefile_path) + else: + raise IOError(" Clip shapefile {0} not found.") + else: + clip = gpd.read_file(self.shapefile_path) + self.logger.write_log("\tClip created at '{0}'".format(self.shapefile_path), 3) + self.logger.write_time_log('ShapefileClip', 'create_clip', timeit.default_timer() - spent_time) + return clip diff --git a/hermesv3_bu/config/__init__.py b/hermesv3_bu/config/__init__.py old mode 100644 new mode 100755 diff --git a/hermesv3_bu/config/config.py b/hermesv3_bu/config/config.py old mode 100644 new mode 100755 index 36d9db4dffb2971102fca7702ca350df2ab74aa0..043ee0e4acc824eb254d1593529bcbe6a1e5eb85 --- a/hermesv3_bu/config/config.py +++ b/hermesv3_bu/config/config.py @@ -2,46 +2,57 @@ # Copyright 2018 Earth Sciences Department, BSC-CNS # -# This file is part of HERMESv3_GR. +# This file is part of HERMESv3_BU. # -# HERMESv3_GR is free software: you can redistribute it and/or modify +# HERMESv3_BU is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# HERMESv3_GR is distributed in the hope that it will be useful, +# HERMESv3_BU is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . +# along with HERMESv3_BU. If not, see . from configargparse import ArgParser +import os +from mpi4py import MPI class Config(ArgParser): """ - Initialization of the arguments that the parser can handle. + Configuration arguments class. """ - def __init__(self): + def __init__(self, new_date=None): + """ + Read and parse all the arguments. + + :param new_date: Starting date for simulation loop day. + :type new_date: datetime.datetime + """ + self.new_date = new_date + super(Config, self).__init__() - self.options = self.read_options() + self.arguments = self.read_arguments() - def read_options(self): + def read_arguments(self): """ - Reads all the options from command line or from the configuration file. + Reads all the arguments from command line or from the configuration file. The value of an argument given by command line has high priority that the one that appear in the configuration file. :return: Arguments already parsed. :rtype: Namespace """ - # p = ArgParser(default_config_files=['/home/Earth/mguevara/HERMES/HERMESv3/IN/conf/hermes.conf']) + from shutil import rmtree + p = ArgParser() p.add_argument('-c', '--my-config', required=False, is_config_file=True, help='Path to the configuration file.') - # TODO Detallar mas que significan 1, 2 y 3 los log_level + # ===== GENERAL ===== p.add_argument('--log_level', required=True, help='Level of detail of the running process information.', type=int, choices=[1, 2, 3]) @@ -51,152 +62,436 @@ class Config(ArgParser): p.add_argument('--output_name', required=True, help="Name of the output file. You can add the string '' that will be substitute by the " + "starting date of the simulation day.") + p.add_argument('--emission_summary', required=False, type=str, default='False', + help='Indicates if you want to create the emission summary files.') p.add_argument('--start_date', required=True, help='Starting Date to simulate (UTC)') p.add_argument('--end_date', required=False, default=None, help='If you want to simulate more than one day you have to specify the ending date of ' + 'simulation in this parameter. If it is not set end_date = start_date.') - - p.add_argument('--output_timestep_type', required=True, help='Type of timestep.', - type=str, choices=['hourly', 'daily', 'monthly', 'yearly']) p.add_argument('--output_timestep_num', required=True, help='Number of timesteps to simulate.', type=int) - p.add_argument('--output_timestep_freq', required=True, help='Frequency between timesteps.', type=int) + p.add_argument('--auxiliary_files_path', required=True, + help='Path to the directory where the necessary auxiliary files will be created if them are ' + + 'not created yet.') + p.add_argument('--erase_auxiliary_files', required=False, default='False', type=str, + help='Indicates if you want to start from scratch removing the auxiliary files already created.') + p.add_argument('--molecular_weights', required=True, + help='Path to the file that contains the molecular weights of the input pollutants.') + # ===== DOMAIN ===== p.add_argument('--output_model', required=True, help='Name of the output model.', - choices=['MONARCH', 'CMAQ', 'WRF_CHEM']) + choices=['MONARCH', 'CMAQ', 'WRF_CHEM', 'DEFAULT']) + p.add_argument('--writing_processors', required=False, type=int, + help='Number of processors dedicated to write. ' + + 'Maximum number accepted is the number of rows of the destiny grid.') p.add_argument('--output_attributes', required=False, help='Path to the file that contains the global attributes.') p.add_argument('--domain_type', required=True, help='Type of domain to simulate.', - choices=['global', 'lcc', 'rotated', 'mercator']) - p.add_argument('--auxiliar_files_path', required=True, - help='Path to the directory where the necessary auxiliary files will be created if them are ' + - 'not created yet.') + choices=['lcc', 'rotated', 'mercator', 'regular']) p.add_argument('--vertical_description', required=True, help='Path to the file that contains the vertical description of the desired output.') - # Global options - p.add_argument('--inc_lat', required=False, help='Latitude resolution for a global domain.', type=float) - p.add_argument('--inc_lon', required=False, help='Longitude resolution for a global domain.', type=float) - # Rotated options - p.add_argument('--centre_lat', required=False, + p.add_argument('--centre_lat', required=False, type=float, help='Central geographic latitude of grid (non-rotated degrees). Corresponds to the TPH0D ' + - 'parameter in NMMB-MONARCH.', type=float) - p.add_argument('--centre_lon', required=False, + 'parameter in NMMB-MONARCH.') + p.add_argument('--centre_lon', required=False, type=float, help='Central geographic longitude of grid (non-rotated degrees, positive east). Corresponds ' + - 'to the TLM0D parameter in NMMB-MONARCH.', type=float) - p.add_argument('--west_boundary', required=False, + 'to the TLM0D parameter in NMMB-MONARCH.') + p.add_argument('--west_boundary', required=False, type=float, help="Grid's western boundary from center point (rotated degrees). Corresponds to the WBD " + - "parameter in NMMB-MONARCH.", type=float) - p.add_argument('--south_boundary', required=False, + "parameter in NMMB-MONARCH.") + p.add_argument('--south_boundary', required=False, type=float, help="Grid's southern boundary from center point (rotated degrees). Corresponds to the SBD " + - "parameter in NMMB-MONARCH.", type=float) - p.add_argument('--inc_rlat', required=False, + "parameter in NMMB-MONARCH.") + p.add_argument('--inc_rlat', required=False, type=float, help='Latitudinal grid resolution (rotated degrees). Corresponds to the DPHD parameter in ' + - 'NMMB-MONARCH.', type=float) - p.add_argument('--inc_rlon', required=False, + 'NMMB-MONARCH.') + p.add_argument('--inc_rlon', required=False, type=float, help='Longitudinal grid resolution (rotated degrees). Corresponds to the DLMD parameter ' + - 'in NMMB-MONARCH.', type=float) + 'in NMMB-MONARCH.') # Lambert conformal conic options - p.add_argument('--lat_1', required=False, - help='Standard parallel 1 (in deg). Corresponds to the P_ALP parameter of the GRIDDESC file.', - type=float) - p.add_argument('--lat_2', required=False, - help='Standard parallel 2 (in deg). Corresponds to the P_BET parameter of the GRIDDESC file.', - type=float) - p.add_argument('--lon_0', required=False, + p.add_argument('--lat_1', required=False, type=float, + help='Standard parallel 1 (in deg). Corresponds to the P_ALP parameter of the GRIDDESC file.') + p.add_argument('--lat_2', required=False, type=float, + help='Standard parallel 2 (in deg). Corresponds to the P_BET parameter of the GRIDDESC file.') + p.add_argument('--lon_0', required=False, type=float, help='Longitude of the central meridian (degrees). Corresponds to the P_GAM parameter of ' + - 'the GRIDDESC file.', type=float) - p.add_argument('--lat_0', required=False, + 'the GRIDDESC file.') + p.add_argument('--lat_0', required=False, type=float, help='Latitude of the origin of the projection (degrees). Corresponds to the Y_CENT ' + - 'parameter of the GRIDDESC file.', type=float) - p.add_argument('--nx', required=False, - help='Number of grid columns. Corresponds to the NCOLS parameter of the GRIDDESC file.', - type=float) - p.add_argument('--ny', required=False, - help='Number of grid rows. Corresponds to the NROWS parameter of the GRIDDESC file.', - type=float) - p.add_argument('--inc_x', required=False, + 'parameter of the GRIDDESC file.') + p.add_argument('--nx', required=False, type=int, + help='Number of grid columns. Corresponds to the NCOLS parameter of the GRIDDESC file.') + p.add_argument('--ny', required=False, type=int, + help='Number of grid rows. Corresponds to the NROWS parameter of the GRIDDESC file.') + p.add_argument('--inc_x', required=False, type=float, help='X-coordinate cell dimension (meters). Corresponds to the XCELL parameter of the ' + - 'GRIDDESC file.', type=float) - p.add_argument('--inc_y', required=False, + 'GRIDDESC file.') + p.add_argument('--inc_y', required=False, type=float, help='Y-coordinate cell dimension (meters). Corresponds to the YCELL parameter of the ' + - 'GRIDDESC file.', type=float) - p.add_argument('--x_0', required=False, + 'GRIDDESC file.') + p.add_argument('--x_0', required=False, type=float, help='X-coordinate origin of grid (meters). Corresponds to the XORIG parameter of the ' + - 'GRIDDESC file.', type=float) - p.add_argument('--y_0', required=False, + 'GRIDDESC file.') + p.add_argument('--y_0', required=False, type=float, help='Y-coordinate origin of grid (meters). Corresponds to the YORIG parameter of the ' + - 'GRIDDESC file.', type=float) + 'GRIDDESC file.') # Mercator - p.add_argument('--lat_ts', required=False, help='...', type=float) - - p.add_argument('--cross_table', required=True, - help='Path to the file that contains the information of the datasets to use.') - p.add_argument('--p_vertical', required=True, - help='Path to the file that contains all the needed vertical profiles.') - p.add_argument('--p_month', required=True, - help='Path to the file that contains all the needed monthly profiles.') - p.add_argument('--p_day', required=True, help='Path to the file that contains all the needed daily profiles.') - p.add_argument('--p_hour', required=True, help='Path to the file that contains all the needed hourly profiles.') - p.add_argument('--p_speciation', required=True, - help='Path to the file that contains all the needed speciation profiles.') - p.add_argument('--molecular_weights', required=True, - help='Path to the file that contains the molecular weights of the input pollutants.') - p.add_argument('--world_info', required=True, - help='Path to the file that contains the world information like timezones, ISO codes, ...') - - options = p.parse_args() - for item in vars(options): - is_str = False - exec ("is_str = str == type(options.{0})".format(item)) + p.add_argument('--lat_ts', required=False, type=float, help='...') + + # Regular lat-lon options: + p.add_argument('--lat_orig', required=False, type=float, help='Latitude of the corner of the first cell.') + p.add_argument('--lon_orig', required=False, type=float, help='Longitude of the corner of the first cell.') + p.add_argument('--n_lat', required=False, type=int, help='Number of latitude elements.') + p.add_argument('--n_lon', required=False, type=int, help='Number of longitude elements.') + p.add_argument('--inc_lat', required=False, type=float, help='Latitude grid resolution.') + p.add_argument('--inc_lon', required=False, type=float, help='Longitude grid resolution.') + + # ===== SECTOR SELECTION ===== + p.add_argument('--traffic_processors', required=True, type=int) + p.add_argument('--traffic_area_processors', required=True, type=int) + p.add_argument('--aviation_processors', required=True, type=int) + p.add_argument('--point_sources_processors', required=True, type=int) + p.add_argument('--recreational_boats_processors', required=True, type=int) + p.add_argument('--shipping_port_processors', required=True, type=int) + p.add_argument('--residential_processors', required=True, type=int) + p.add_argument('--livestock_processors', required=True, type=int) + p.add_argument('--crop_operations_processors', required=True, type=int) + p.add_argument('--crop_fertilizers_processors', required=True, type=int) + p.add_argument('--agricultural_machinery_processors', required=True, type=int) + + p.add_argument('--speciation_map', required=False, help='...') + + # ===== SHAPEFILES ===== + p.add_argument('--nut_shapefile_prov', required=False, type=str, default='True') + p.add_argument('--nut_shapefile_ccaa', required=False, type=str, default='True') + + p.add_argument('--clipping', required=False, type=str, default=None, + help='To clip the domain into an specific zone. ' + + 'It can be a shapefile path, a list of points to make a polygon or nothing to use ' + + 'the default clip: domain extension') + + # ===== METEO PATHS ===== + p.add_argument('--temperature_hourly_files_path', required=False, type=str, default='True') + p.add_argument('--temperature_daily_files_path', required=False, type=str, default='True') + p.add_argument('--wind_speed_daily_files_path', required=False, type=str, default='True') + p.add_argument('--precipitation_files_path', required=False, type=str, default='True') + p.add_argument('--temperature_4d_dir', required=False, type=str, default='True') + p.add_argument('--temperature_sfc_dir', required=False, type=str, default='True') + p.add_argument('--u_wind_speed_4d_dir', required=False, type=str, default='True') + p.add_argument('--v_wind_speed_4d_dir', required=False, type=str, default='True') + p.add_argument('--u10_wind_speed_dir', required=False, type=str, default='True') + p.add_argument('--v10_wind_speed_dir', required=False, type=str, default='True') + p.add_argument('--friction_velocity_dir', required=False, type=str, default='True') + p.add_argument('--pblh_dir', required=False, type=str, default='True') + p.add_argument('--obukhov_length_dir', required=False, type=str, default='True') + p.add_argument('--layer_thickness_dir', required=False, type=str, default='True') + + # ***** AVIATION SECTOR ***** + p.add_argument('--aviation_source_pollutants', required=False, help='...') + p.add_argument('--airport_list', required=False, help='...') + p.add_argument('--plane_list', required=False, help='...') + p.add_argument('--airport_shapefile_path', required=False, help='...') + p.add_argument('--airport_runways_shapefile_path', required=False, help='...') + p.add_argument('--airport_runways_corners_shapefile_path', required=False, help='...') + p.add_argument('--airport_trajectories_shapefile_path', required=False, help='...') + p.add_argument('--airport_operations_path', required=False, help='...') + p.add_argument('--planes_path', required=False, help='...') + p.add_argument('--airport_times_path', required=False, help='...') + p.add_argument('--airport_ef_dir', required=False, help='...') + p.add_argument('--aviation_weekly_profiles', required=False, help='...') + p.add_argument('--aviation_hourly_profiles', required=False, help='...') + p.add_argument('--aviation_speciation_profiles', required=False, help='...') + + # ***** SHIPPING PORT SECTOR ***** + p.add_argument('--shipping_port_source_pollutants', required=False, help='...') + p.add_argument('--vessel_list', required=False, help='...') + p.add_argument('--port_list', required=False, help='...') + p.add_argument('--hoteling_shapefile_path', required=False, help='...') + p.add_argument('--maneuvering_shapefile_path', required=False, help='...') + p.add_argument('--shipping_port_ef_path', required=False, help='...') + p.add_argument('--shipping_port_engine_percent_path', required=False, help='...') + p.add_argument('--shipping_port_tonnage_path', required=False, help='...') + p.add_argument('--shipping_port_load_factor_path', required=False, help='...') + p.add_argument('--shipping_port_power_path', required=False, help='...') + p.add_argument('--shipping_port_monthly_profiles', required=False, help='...') + p.add_argument('--shipping_port_weekly_profiles', required=False, help='...') + p.add_argument('--shipping_port_hourly_profiles', required=False, help='...') + p.add_argument('--shipping_port_speciation_profiles', required=False, help='...') + + # ***** LIVESTOCK SECTOR ***** + p.add_argument('--livestock_source_pollutants', required=False, help='...') + p.add_argument('--animal_list', required=False, help='...') + p.add_argument('--gridded_livestock', required=False, help='...') + p.add_argument('--correction_split_factors', required=False, help='...') + p.add_argument('--denominator_yearly_factor_dir', required=False, help='...') + p.add_argument('--livestock_ef_files_dir', required=False, help='...') + p.add_argument('--livestock_monthly_profiles', required=False, help='...') + p.add_argument('--livestock_weekly_profiles', required=False, help='...') + p.add_argument('--livestock_hourly_profiles', required=False, help='...') + p.add_argument('--livestock_speciation_profiles', required=False, help='...') + + # ***** AGRICULTURAL SECTOR***** + p.add_argument('--land_uses_path', required=False, help='...') + p.add_argument('--land_use_by_nut_path', required=False, help='...') + p.add_argument('--crop_by_nut_path', required=False, help='...') + p.add_argument('--crop_from_landuse_path', required=False, help='...') + + # ***** CROP OPERATIONS SECTOR + p.add_argument('--crop_operations_source_pollutants', required=False, help='...') + p.add_argument('--crop_operations_list', required=False, help='...') + p.add_argument('--crop_operations_ef_files_dir', required=False, help='...') + p.add_argument('--crop_operations_monthly_profiles', required=False, help='...') + p.add_argument('--crop_operations_weekly_profiles', required=False, help='...') + p.add_argument('--crop_operations_hourly_profiles', required=False, help='...') + p.add_argument('--crop_operations_speciation_profiles', required=False, help='...') + + # ***** CROP FERTILIZERS SECTOR ***** + p.add_argument('--crop_fertilizers_source_pollutants', required=False, help='...') + p.add_argument('--crop_fertilizers_list', required=False, help='...') + p.add_argument('--cultivated_ratio', required=False, help='...') + p.add_argument('--fertilizers_rate', required=False, help='...') + p.add_argument('--crop_f_parameter', required=False, help='...') + p.add_argument('--crop_f_fertilizers', required=False, help='...') + p.add_argument('--gridded_ph', required=False, help='...') + p.add_argument('--gridded_cec', required=False, help='...') + p.add_argument('--fertilizers_denominator_yearly_factor_path', required=False, help='...') + p.add_argument('--crop_calendar', required=False, help='...') + p.add_argument('--crop_fertilizers_hourly_profiles', required=False, help='...') + p.add_argument('--crop_fertilizers_speciation_profiles', required=False, help='...') + p.add_argument('--crop_growing_degree_day_path', required=False, help='...') + + # ***** CROP MACHINERY SECTOR ***** + p.add_argument('--crop_machinery_source_pollutants', required=False, help='...') + p.add_argument('--crop_machinery_list', required=False, help='...') + p.add_argument('--machinery_list', required=False, help='...') + p.add_argument('--crop_machinery_deterioration_factor_path', required=False, help='...') + p.add_argument('--crop_machinery_load_factor_path', required=False, help='...') + p.add_argument('--crop_machinery_vehicle_ratio_path', required=False, help='...') + p.add_argument('--crop_machinery_vehicle_units_path', required=False, help='...') + p.add_argument('--crop_machinery_vehicle_workhours_path', required=False, help='...') + p.add_argument('--crop_machinery_vehicle_power_path', required=False, help='...') + p.add_argument('--crop_machinery_ef_path', required=False, help='...') + p.add_argument('--crop_machinery_monthly_profiles', required=False, help='...') + p.add_argument('--crop_machinery_weekly_profiles', required=False, help='...') + p.add_argument('--crop_machinery_hourly_profiles', required=False, help='...') + p.add_argument('--crop_machinery_speciation_map', required=False, help='...') + p.add_argument('--crop_machinery_speciation_profiles', required=False, help='...') + p.add_argument('--crop_machinery_by_nut', required=False, help='...') + + # ***** RESIDENTIAL SECTOR ***** + p.add_argument('--fuel_list', required=False, help='...') + p.add_argument('--residential_source_pollutants', required=False, help='...') + p.add_argument('--population_density_map', required=False, help='...') + p.add_argument('--population_type_map', required=False, help='...') + p.add_argument('--population_type_by_ccaa', required=False, help='...') + p.add_argument('--population_type_by_prov', required=False, help='...') + p.add_argument('--energy_consumption_by_prov', required=False, help='...') + p.add_argument('--energy_consumption_by_ccaa', required=False, help='...') + p.add_argument('--residential_spatial_proxies', required=False, help='...') + p.add_argument('--residential_ef_files_path', required=False, help='...') + p.add_argument('--residential_heating_degree_day_path', required=False, help='...') + p.add_argument('--residential_hourly_profiles', required=False, help='...') + p.add_argument('--residential_speciation_profiles', required=False, help='...') + + # ***** RECREATIONAL BOATS SECTOR ***** + p.add_argument('--recreational_boats_source_pollutants', required=False, help='...') + p.add_argument('--recreational_boats_list', required=False, help='...') + p.add_argument('--recreational_boats_density_map', required=False, help='...') + p.add_argument('--recreational_boats_by_type', required=False, help='...') + p.add_argument('--recreational_boats_ef_path', required=False, help='...') + p.add_argument('--recreational_boats_monthly_profiles', required=False, help='...') + p.add_argument('--recreational_boats_weekly_profiles', required=False, help='...') + p.add_argument('--recreational_boats_hourly_profiles', required=False, help='...') + p.add_argument('--recreational_boats_speciation_profiles', required=False, help='...') + + # ***** POINT SOURCE SECTOR ***** + p.add_argument('--point_source_pollutants', required=False, help='...') + p.add_argument('--plume_rise', required=False, help='...') + p.add_argument('--point_source_snaps', required=False, help='...') + p.add_argument('--point_source_catalog', required=False, help='...') + p.add_argument('--point_source_monthly_profiles', required=False, help='...') + p.add_argument('--point_source_weekly_profiles', required=False, help='...') + p.add_argument('--point_source_hourly_profiles', required=False, help='...') + p.add_argument('--point_source_speciation_profiles', required=False, help='...') + p.add_argument('--point_source_measured_emissions', required=False, help='...') + + # ***** TRAFFIC SECTOR ***** + p.add_argument('--do_hot', required=False, help='...') + p.add_argument('--do_cold', required=False, help='...') + p.add_argument('--do_tyre_wear', required=False, help='...') + p.add_argument('--do_brake_wear', required=False, help='...') + p.add_argument('--do_road_wear', required=False, help='...') + p.add_argument('--do_resuspension', required=False, help='...') + p.add_argument('--resuspension_correction', required=False, help='...') + p.add_argument('--write_rline', required=False, help='...') + + p.add_argument('--traffic_pollutants', required=False, help='...') + p.add_argument('--vehicle_types', required=False, help='...') + p.add_argument('--load', type=float, required=False, help='...') + p.add_argument('--road_link_path', required=False, help='...') + p.add_argument('--fleet_compo_path', required=False, help='...') + p.add_argument('--traffic_ef_path', required=False, help='...') + p.add_argument('--traffic_speed_hourly_path', required=False, help='...') + p.add_argument('--traffic_monthly_profiles', required=False, help='...') + p.add_argument('--traffic_weekly_profiles', required=False, help='...') + p.add_argument('--traffic_hourly_profiles_mean', required=False, help='...') + p.add_argument('--traffic_hourly_profiles_weekday', required=False, help='...') + p.add_argument('--traffic_hourly_profiles_saturday', required=False, help='...') + p.add_argument('--traffic_hourly_profiles_sunday', required=False, help='...') + p.add_argument('--traffic_speciation_profile_hot_cold', required=False, help='...') + p.add_argument('--traffic_speciation_profile_tyre', required=False, help='...') + p.add_argument('--traffic_speciation_profile_road', required=False, help='...') + p.add_argument('--traffic_speciation_profile_brake', required=False, help='...') + p.add_argument('--traffic_speciation_profile_resuspension', required=False, help='...') + + # ***** TRAFFIC AREA SECTOR ***** + p.add_argument('--traffic_area_pollutants', required=False, help='...') + p.add_argument('--do_evaporative', required=False, help='...') + p.add_argument('--traffic_area_gas_path', required=False, help='...') + p.add_argument('--popullation_by_municipality', required=False, help='...') + p.add_argument('--traffic_area_speciation_profiles_evaporative', required=False, help='...') + p.add_argument('--traffic_area_evaporative_ef_file', required=False, help='...') + p.add_argument('--do_small_cities', required=False, help='...') + p.add_argument('--traffic_area_small_cities_path', required=False, help='...') + p.add_argument('--traffic_area_speciation_profiles_small_cities', required=False, help='...') + p.add_argument('--traffic_area_small_cities_ef_file', required=False, help='...') + p.add_argument('--small_cities_hourly_profile', required=False, help='...') + p.add_argument('--small_cities_weekly_profile', required=False, help='...') + p.add_argument('--small_cities_monthly_profile', required=False, help='...') + + arguments = p.parse_args() + + for item in vars(arguments): + is_str = isinstance(arguments.__dict__[item], str) if is_str: - exec("options.{0} = options.{0}.replace('', options.input_dir)".format(item)) - exec("options.{0} = options.{0}.replace('', options.domain_type)".format(item)) - if options.domain_type == 'global': - exec("options.{0} = options.{0}.replace('', '{1}_{2}')".format( - item, options.inc_lat, options.inc_lon)) - elif options.domain_type == 'rotated': - exec("options.{0} = options.{0}.replace('', '{1}_{2}')".format( - item, options.inc_rlat, options.inc_rlon)) - elif options.domain_type == 'lcc' or options.domain_type == 'mercator': - exec("options.{0} = options.{0}.replace('', '{1}_{2}')".format( - item, options.inc_x, options.inc_y)) - - options.start_date = self._parse_start_date(options.start_date) - options.end_date = self._parse_end_date(options.end_date, options.start_date) - - self.create_dir(options.output_dir) - self.create_dir(options.auxiliar_files_path) - - return options - - def get_output_name(self, date): + arguments.__dict__[item] = arguments.__dict__[item].replace('', arguments.data_path) + arguments.__dict__[item] = arguments.__dict__[item].replace('', arguments.input_dir) + arguments.__dict__[item] = arguments.__dict__[item].replace('', arguments.domain_type) + + if arguments.domain_type == 'regular': + arguments.__dict__[item] = arguments.__dict__[item].replace('', '{1}_{2}'.format( + item, arguments.inc_lat, arguments.inc_lon)) + elif arguments.domain_type == 'rotated': + arguments.__dict__[item] = arguments.__dict__[item].replace('', '{1}_{2}'.format( + item, arguments.inc_rlat, arguments.inc_rlon)) + elif arguments.domain_type == 'lcc' or arguments.domain_type == 'mercator': + arguments.__dict__[item] = arguments.__dict__[item].replace('', '{1}_{2}'.format( + item, arguments.inc_x, arguments.inc_y)) + + arguments.emission_summary = self._parse_bool(arguments.emission_summary) + arguments.start_date = self._parse_start_date(arguments.start_date) + arguments.end_date = self._parse_end_date(arguments.end_date, arguments.start_date) + arguments.output_name = self.get_output_name(arguments) + + arguments.erase_auxiliary_files = self._parse_bool(arguments.erase_auxiliary_files) + self.create_dir(arguments.output_dir) + + if arguments.erase_auxiliary_files: + if os.path.exists(arguments.auxiliary_files_path): + comm = MPI.COMM_WORLD + if comm.Get_rank() == 0: + rmtree(arguments.auxiliary_files_path) + comm.Barrier() + self.create_dir(arguments.auxiliary_files_path) + + arguments.do_traffic = arguments.traffic_processors > 0 + arguments.do_traffic_area = arguments.traffic_area_processors > 0 + arguments.do_aviation = arguments.aviation_processors > 0 + arguments.do_point_sources = arguments.point_sources_processors > 0 + arguments.do_recreational_boats = arguments.recreational_boats_processors > 0 + arguments.do_shipping_port = arguments.shipping_port_processors > 0 + arguments.do_residential = arguments.residential_processors > 0 + arguments.do_livestock = arguments.livestock_processors > 0 + arguments.do_crop_operations = arguments.crop_operations_processors > 0 + arguments.do_crop_fertilizers = arguments.crop_fertilizers_processors > 0 + arguments.do_agricultural_machinery = arguments.agricultural_machinery_processors > 0 + + # Aviation lists + arguments.airport_list = self._parse_list(arguments.airport_list) + arguments.plane_list = self._parse_list(arguments.plane_list) + arguments.aviation_source_pollutants = self._parse_list(arguments.aviation_source_pollutants) + + # Shipping Port lists + arguments.shipping_port_source_pollutants = self._parse_list(arguments.shipping_port_source_pollutants) + arguments.vessel_list = self._parse_list(arguments.vessel_list) + arguments.port_list = self._parse_list(arguments.port_list) + + # Livestock lists + arguments.livestock_source_pollutants = self._parse_list(arguments.livestock_source_pollutants) + arguments.animal_list = self._parse_list(arguments.animal_list) + + # Crop operations lists + arguments.crop_operations_source_pollutants = self._parse_list(arguments.crop_operations_source_pollutants) + arguments.crop_operations_list = self._parse_list(arguments.crop_operations_list) + + # Crop fertilizers lists + arguments.crop_fertilizers_source_pollutants = self._parse_list(arguments.crop_fertilizers_source_pollutants) + arguments.crop_fertilizers_list = self._parse_list(arguments.crop_fertilizers_list) + + # Crop machinery lists + arguments.crop_machinery_source_pollutants = self._parse_list(arguments.crop_machinery_source_pollutants) + arguments.crop_machinery_list = self._parse_list(arguments.crop_machinery_list) + arguments.machinery_list = self._parse_list(arguments.machinery_list) + + # Residential lists + arguments.fuel_list = self._parse_list(arguments.fuel_list) + arguments.residential_source_pollutants = self._parse_list(arguments.residential_source_pollutants) + + # Recreational Boats lists + arguments.recreational_boats_source_pollutants = self._parse_list( + arguments.recreational_boats_source_pollutants) + arguments.recreational_boats_list = self._parse_list(arguments.recreational_boats_list) + + # Point Source bools + arguments.plume_rise = self._parse_bool(arguments.plume_rise) + + # Point Source lists + arguments.point_source_pollutants = self._parse_list(arguments.point_source_pollutants) + arguments.point_source_snaps = self._parse_list(arguments.point_source_snaps) + + # Traffic bools + arguments.do_hot = self._parse_bool(arguments.do_hot) + arguments.do_cold = self._parse_bool(arguments.do_cold) + arguments.do_tyre_wear = self._parse_bool(arguments.do_tyre_wear) + arguments.do_brake_wear = self._parse_bool(arguments.do_brake_wear) + arguments.do_road_wear = self._parse_bool(arguments.do_road_wear) + arguments.do_resuspension = self._parse_bool(arguments.do_resuspension) + arguments.resuspension_correction = self._parse_bool(arguments.resuspension_correction) + arguments.write_rline = self._parse_bool(arguments.write_rline) + + # Traffic lists + arguments.traffic_pollutants = self._parse_list(arguments.traffic_pollutants) + arguments.vehicle_types = self._parse_list(arguments.vehicle_types) + + # Traffic area bools + arguments.do_evaporative = self._parse_bool(arguments.do_evaporative) + arguments.do_small_cities = self._parse_bool(arguments.do_small_cities) + + # Traffic area lists + arguments.traffic_area_pollutants = self._parse_list(arguments.traffic_area_pollutants) + + return arguments + + @staticmethod + def get_output_name(arguments): """ - Generates the full path of the output replacing by YYYYMMDDHH, YYYYMMDD, YYYYMM or YYYY depending on the - output_timestep_type. + Generates the full path of the output replacing by YYYYMMDDHH. - :param date: Date of the day to simulate. - :type: datetime.datetime + :param arguments: Config file arguments. + :type arguments: Namespace :return: Complete path to the output file. :rtype: str """ import os - if self.options.output_timestep_type == 'hourly': - file_name = self.options.output_name.replace('', date.strftime('%Y%m%d%H')) - elif self.options.output_timestep_type == 'daily': - file_name = self.options.output_name.replace('', date.strftime('%Y%m%d')) - elif self.options.output_timestep_type == 'monthly': - file_name = self.options.output_name.replace('', date.strftime('%Y%m')) - elif self.options.output_timestep_type == 'yearly': - file_name = self.options.output_name.replace('', date.strftime('%Y')) - else: - file_name = self.options.output_name - full_path = os.path.join(self.options.output_dir, file_name) + file_name = arguments.output_name.replace('', arguments.start_date.strftime('%Y%m%d%H')) + + full_path = os.path.join(arguments.output_dir, file_name) return full_path @staticmethod @@ -245,8 +540,7 @@ class Config(ArgParser): print '/t Using False as default' return False - @staticmethod - def _parse_start_date(str_date): + def _parse_start_date(self, str_date): """ Parse the date form string to datetime. It accepts several ways to introduce the date: @@ -254,15 +548,18 @@ class Config(ArgParser): YYYY/MM/DD hh:mm:ss, YYYY-MM-DD hh:mm:ss, YYYY/MM/DD_hh, YYYY-MM-DD_hh. :param str_date: Date to the day to simulate in string format. - :type str_date: str + :type str_date: str, datetime :return: Date to the day to simulate in datetime format. :rtype: datetime.datetime """ from datetime import datetime + + if self.new_date is not None: + return self.new_date + format_types = ['%Y%m%d', '%Y%m%d%H', '%Y%m%d.%H', '%Y/%m/%d_%H:%M:%S', '%Y-%m-%d_%H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y/%m/%d_%H', '%Y-%m-%d_%H', '%Y/%m/%d'] - date = None for date_format in format_types: try: @@ -283,7 +580,7 @@ class Config(ArgParser): If it's not defined it will be the same date that start_date (to do only one day). :param end_date: Date to the last day to simulate in string format. - :type end_date: str + :type end_date: str, datetime :param start_date: Date to the first day to simulate. :type start_date: datetime.datetime @@ -293,17 +590,12 @@ class Config(ArgParser): """ if end_date is None: return start_date - else: - return self._parse_start_date(end_date) + return self._parse_start_date(end_date) - def set_log_level(self): - """ - Defines the log_level using the common script settings. - """ - import settings - settings.define_global_vars(self.options.log_level) - - -if __name__ == '__main__': - config = Config() - print config.options + @staticmethod + def _parse_list(str_list): + import re + try: + return list(map(str, re.split(' , |, | ,|,| ; |; | ;|;| ', str_list))) + except TypeError: + return None diff --git a/hermesv3_bu/config/settings.py b/hermesv3_bu/config/settings.py deleted file mode 100644 index 1b93cfa1182ba517531a30737ae119b8c81f5942..0000000000000000000000000000000000000000 --- a/hermesv3_bu/config/settings.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import os -import numpy as np - -global refresh_log - -global precision -precision = np.float64 - -global writing_serial -writing_serial = False - -global compressed_netcdf -compressed_netcdf = True - -if not writing_serial: - compressed_netcdf = False - -global icomm -global comm -global rank -global size - -global log_level -global log_file -global df_times - - -def define_global_vars(in_log_level): - # TODO Documentation - from mpi4py import MPI - - global icomm - global comm - global rank - global size - - icomm = MPI.COMM_WORLD - comm = icomm.Split(color=0, key=0) - rank = comm.Get_rank() - size = comm.Get_size() - - global log_level - log_level = in_log_level - - -def define_log_file(log_path, date): - # TODO Documentation - log_path = os.path.join(log_path, 'logs') - if not os.path.exists(log_path): - if rank == 0: - os.makedirs(log_path) - comm.Barrier() - log_path = os.path.join(log_path, 'HERMESv3_{0}_Rank{1}_Procs{2}.log'.format( - date.strftime('%Y%m%d%H'), str(rank).zfill(4), str(size).zfill(4))) - if os.path.exists(log_path): - os.remove(log_path) - - global log_file - - log_file = open(log_path, mode='w') - - -def define_times_file(): - # TODO Documentation - import pandas as pd - global df_times - - df_times = pd.DataFrame(columns=['Class', 'Function', rank]) - - -def write_log(msg, level=1): - # TODO Documentation - if log_level >= level: - log_file.write(msg + '\n') - log_file.flush() - - -def write_time(module, func, time, level=1): - # TODO Documentation - global df_times - if log_level >= level: - df_times = df_times.append({'Class': module, 'Function': func, rank: time}, ignore_index=True) - - -def finish_logs(output_dir, date): - # TODO Documentation - import pandas as pd - from functools import reduce - log_file.close() - - global df_times - df_times = df_times.groupby(['Class', 'Function']).sum().reset_index() - data_frames = comm.gather(df_times, root=0) - if rank == 0: - times_path = os.path.join(output_dir, 'logs', 'HERMESv3_{0}_times_Procs{1}.csv'.format( - date.strftime('%Y%m%d%H'), str(size).zfill(4))) - if os.path.exists(times_path): - os.remove(times_path) - df_merged = reduce(lambda left, right: pd.merge(left, right, on=['Class', 'Function'], how='outer'), - data_frames) - df_merged['min'] = df_merged.loc[:, range(size)].min(axis=1) - df_merged['max'] = df_merged.loc[:, range(size)].max(axis=1) - df_merged['mean'] = df_merged.loc[:, range(size)].mean(axis=1) - - df_merged.to_csv(times_path) - comm.Barrier() diff --git a/hermesv3_bu/modules/grids/__init__.py b/hermesv3_bu/grids/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from hermesv3_bu/modules/grids/__init__.py rename to hermesv3_bu/grids/__init__.py diff --git a/hermesv3_bu/grids/grid.py b/hermesv3_bu/grids/grid.py new file mode 100755 index 0000000000000000000000000000000000000000..4269f20c660ab71f7842a0ea200d0ea5d62c3b6b --- /dev/null +++ b/hermesv3_bu/grids/grid.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python + +import os +import timeit +import numpy as np + +from hermesv3_bu.logger.log import Log + + +def select_grid(comm, logger, arguments): + """ + Create and initialise the output grid. + + :param comm: MPI communicator. + + :param logger: Logger + :type logger: Log + + :param arguments: Dictionary with all the necessary arguments to initialise the grid + :type arguments: namespace + + :return: Desired output grid. + :rtype: Grid + """ + spent_time = timeit.default_timer() + if comm.Get_rank() == 0: + if arguments.domain_type == 'regular': + from hermesv3_bu.grids.grid_latlon import LatLonGrid + grid = LatLonGrid( + comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + arguments.vertical_description, arguments.inc_lat, arguments.inc_lon, arguments.lat_orig, + arguments.lon_orig, arguments.n_lat, arguments.n_lon) + + elif arguments.domain_type == 'lcc': + from hermesv3_bu.grids.grid_lcc import LccGrid + grid = LccGrid( + comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + arguments.vertical_description, arguments.lat_1, arguments.lat_2, arguments.lon_0, arguments.lat_0, + arguments.nx, arguments.ny, arguments.inc_x, arguments.inc_y, arguments.x_0, arguments.y_0) + + elif arguments.domain_type == 'rotated': + from hermesv3_bu.grids.grid_rotated import RotatedGrid + grid = RotatedGrid( + comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + arguments.vertical_description, arguments.centre_lat, arguments.centre_lon, arguments.west_boundary, + arguments.south_boundary, arguments.inc_rlat, arguments.inc_rlon) + + elif arguments.domain_type == 'mercator': + from hermesv3_bu.grids.grid_mercator import MercatorGrid + grid = MercatorGrid( + comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + arguments.vertical_description, arguments.lat_ts, arguments.lon_0, arguments.nx, arguments.ny, + arguments.inc_x, arguments.inc_y, arguments.x_0, arguments.y_0) + + else: + raise NameError('Unknown grid type {0}'.format(arguments.domain_type)) + else: + grid = None + + grid = comm.bcast(grid, root=0) + logger.write_time_log('Grid', 'select_grid', timeit.default_timer() - spent_time) + return grid + + +class Grid(object): + + def __init__(self, comm, logger, attributes, auxiliary_path, vertical_description_path): + """ + Initialise the Grid class + + :param logger: Logger + :type logger: Log + + :param attributes: Attributes to define the grid + :type attributes: dict + + :param auxiliary_path: Path to the folder to store all the needed auxiliary files. + :type auxiliary_path: str + + :param vertical_description_path: Path to the file that describes the vertical resolution + :type vertical_description_path: str + """ + spent_time = timeit.default_timer() + self.comm = comm + self.logger = logger + self.logger.write_log('\tGrid specifications: {0}'.format(attributes), 3) + self.attributes = attributes + self.netcdf_path = os.path.join(auxiliary_path, 'grid', 'grid.nc') + self.shapefile_path = os.path.join(auxiliary_path, 'grid', 'grid.shp') + + self.center_latitudes = None + self.center_longitudes = None + self.boundary_latitudes = None + self.boundary_longitudes = None + self.shape = None + self.create_coords() + self.write_netcdf() + + self.vertical_desctiption = self.get_vertical_description(vertical_description_path) + self.shapefile = self.create_shapefile() + + logger.write_time_log('Grid', '__init__', timeit.default_timer() - spent_time) + + def get_vertical_description(self, path): + """ + Extract the vertical description of the desired output. + + :param path: Path to the file that contains the output vertical description. + :type path: str + + :return: Heights of the output vertical layers. + :rtype: list + """ + import pandas as pd + spent_time = timeit.default_timer() + df = pd.read_csv(path, sep=',') + + heights = df.height_magl.values + self.logger.write_time_log('Grid', 'get_vertical_description', timeit.default_timer() - spent_time, 3) + return heights + + def write_netcdf(self): + """ + Implemented on inner classes + """ + pass + + def create_coords(self): + """ + Implemented on inner classes + """ + pass + + def create_bounds(self, coordinates, inc, number_vertices=2, inverse=False): + """ + Calculate the vertices coordinates. + + :param coordinates: Coordinates in degrees (latitude or longitude) + :type coordinates: numpy.array + + :param inc: Increment between center values. + :type inc: float + + :param number_vertices: Non mandatory parameter that informs the number of vertices that must have the + boundaries (by default 2). + :type number_vertices: int + + :param inverse: For some grid latitudes. + :type inverse: bool + + :return: Array with as many elements as vertices for each value of coords. + :rtype: numpy.array + """ + spent_time = timeit.default_timer() + # Create new arrays moving the centers half increment less and more. + coords_left = coordinates - inc / 2 + coords_right = coordinates + inc / 2 + + # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. + if number_vertices == 2: + # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell + bound_coords = np.dstack((coords_left, coords_right)) + bound_coords = bound_coords.reshape((len(coordinates), number_vertices)) + elif number_vertices == 4: + # Create an array of N arrays of 4 elements to store the corner values for each cell + # It can be stored in clockwise starting form the left-top element, or in inverse mode. + if inverse: + bound_coords = np.dstack((coords_left, coords_left, coords_right, coords_right)) + else: + bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) + else: + raise ValueError('ERROR: The number of vertices of the boundaries must be 2 or 4.') + self.logger.write_time_log('Grid', 'create_bounds', timeit.default_timer() - spent_time, 3) + return bound_coords + + def create_shapefile(self): + """ + Create a shapefile with the grid. + + :return: Grid shapefile + :rtype: GeoDataFrame + """ + import geopandas as gpd + import pandas as pd + from shapely.geometry import Polygon + spent_time = timeit.default_timer() + + if not os.path.exists(self.shapefile_path): + if not os.path.exists(os.path.dirname(self.shapefile_path)): + os.makedirs(os.path.dirname(self.shapefile_path)) + + y = self.boundary_latitudes + x = self.boundary_longitudes + + if self.grid_type == 'Regular Lat-Lon': + x = x.reshape((x.shape[1], x.shape[2])) + y = y.reshape((y.shape[1], y.shape[2])) + + aux_shape = (y.shape[0], x.shape[0], 4) + x_aux = np.empty(aux_shape) + x_aux[:, :, 0] = x[np.newaxis, :, 0] + x_aux[:, :, 1] = x[np.newaxis, :, 1] + x_aux[:, :, 2] = x[np.newaxis, :, 1] + x_aux[:, :, 3] = x[np.newaxis, :, 0] + + x = x_aux + del x_aux + + y_aux = np.empty(aux_shape) + y_aux[:, :, 0] = y[:, np.newaxis, 0] + y_aux[:, :, 1] = y[:, np.newaxis, 0] + y_aux[:, :, 2] = y[:, np.newaxis, 1] + y_aux[:, :, 3] = y[:, np.newaxis, 1] + + y = y_aux + del y_aux + + aux_b_lats = y.reshape((y.shape[0] * y.shape[1], y.shape[2])) + aux_b_lons = x.reshape((x.shape[0] * x.shape[1], x.shape[2])) + + # Create one dataframe with 8 columns, 4 points with two coordinates each one + df_lats = pd.DataFrame(aux_b_lats, columns=['b_lat_1', 'b_lat_2', 'b_lat_3', 'b_lat_4']) + df_lons = pd.DataFrame(aux_b_lons, columns=['b_lon_1', 'b_lon_2', 'b_lon_3', 'b_lon_4']) + df = pd.concat([df_lats, df_lons], axis=1) + + # Substituate 8 columns by 4 with the two coordinates + df['p1'] = zip(df.b_lon_1, df.b_lat_1) + del df['b_lat_1'], df['b_lon_1'] + df['p2'] = zip(df.b_lon_2, df.b_lat_2) + del df['b_lat_2'], df['b_lon_2'] + df['p3'] = zip(df.b_lon_3, df.b_lat_3) + del df['b_lat_3'], df['b_lon_3'] + df['p4'] = zip(df.b_lon_4, df.b_lat_4) + del df['b_lat_4'], df['b_lon_4'] + + # Make a list of list of tuples + list_points = df.values + del df['p1'], df['p2'], df['p3'], df['p4'] + + # List of polygons from the list of points + geometry = [Polygon(list(points)) for points in list_points] + + gdf = gpd.GeoDataFrame(index=df.index, crs={'init': 'epsg:4326'}, geometry=geometry) + gdf = gdf.to_crs(self.attributes['crs']) + gdf['FID'] = gdf.index + gdf.to_file(self.shapefile_path) + + else: + gdf = gpd.read_file(self.shapefile_path) + + # gdf.set_index('FID', inplace=True, drop=False) + self.logger.write_time_log('Grid', 'create_shapefile', timeit.default_timer() - spent_time, 2) + + return gdf + + def add_cell_area(self): + from cdo import Cdo + # spent_time = timeit.default_timer() + + # Initialises the CDO + cdo = Cdo() + cell_area = cdo.gridarea(input=self.netcdf_path, returnArray='cell_area') + self.shapefile['cell_area'] = cell_area.flatten() + + # self.logger.write_time_log('Grid', 'add_cell_area', timeit.default_timer() - spent_time) diff --git a/hermesv3_bu/grids/grid_latlon.py b/hermesv3_bu/grids/grid_latlon.py new file mode 100755 index 0000000000000000000000000000000000000000..8b3def85528278cfdf111b300325e19d1d6706eb --- /dev/null +++ b/hermesv3_bu/grids/grid_latlon.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python + +# Copyright 2018 Earth Sciences Department, BSC-CNS +# +# This file is part of HERMESv3_GR. +# +# HERMESv3_GR is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HERMESv3_GR is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HERMESv3_GR. If not, see . + + +import os +import timeit + +import numpy as np +from hermesv3_bu.grids.grid import Grid +from hermesv3_bu.io_server.io_netcdf import write_coords_netcdf +from hermesv3_bu.logger.log import Log + + +class LatLonGrid(Grid): + + def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, inc_lat, inc_lon, lat_orig, + lon_orig, n_lat, n_lon): + """ + Regional regular lat-lon grid object that contains all the information to do a global output. + + :param logger: Logger. + :type logger: Log + + :param auxiliary_path: Path to the folder to store all the needed auxiliary files. + :type auxiliary_path: str + + :param tstep_num: Number of time steps. + :type tstep_num: int + + :param vertical_description_path: Path to the file that describes the vertical resolution + :type vertical_description_path: str + + :param inc_lat: Increment between latitude centroids. + :type inc_lat: float + + :param inc_lon: Increment between longitude centroids. + :type inc_lon: float + + :param lat_orig: Location of the latitude of the corner of the first cell (down left). + :type lat_orig: float + + :param lon_orig: Location of the longitude of the corner of the first cell (down left). + :type lon_orig: float + + :param n_lat: Number of cells on the latitude direction. + :type n_lat = int + + :param n_lon: Number of cells on the latitude direction. + :type n_lon = int + """ + spent_time = timeit.default_timer() + logger.write_log('Regular Lat-Lon grid selected.') + self.grid_type = 'Regular Lat-Lon' + attributes = {'inc_lat': inc_lat, 'inc_lon': inc_lon, 'lat_orig': lat_orig, 'lon_orig': lon_orig, + 'n_lat': n_lat, 'n_lon': n_lon, 'crs': {'init': 'epsg:4326'}} + # Initialize the class using parent + super(LatLonGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + + self.shape = (tstep_num, len(self.vertical_desctiption), n_lat, n_lon) + + self.logger.write_time_log('LatLonGrid', '__init__', timeit.default_timer() - spent_time) + + def create_coords(self): + """ + Create the coordinates for a global domain. + """ + spent_time = timeit.default_timer() + # From corner latitude /longitude to center ones + lat_c_orig = self.attributes['lat_orig'] + (self.attributes['inc_lat'] / 2) + self.center_latitudes = np.linspace( + lat_c_orig, lat_c_orig + (self.attributes['inc_lat'] * (self.attributes['n_lat'] - 1)), + self.attributes['n_lat'], dtype=np.float) + self.boundary_latitudes = self.create_bounds(self.center_latitudes, self.attributes['inc_lat']) + + # ===== Longitudes ===== + lon_c_orig = self.attributes['lon_orig'] + (self.attributes['inc_lon'] / 2) + self.center_longitudes = np.linspace( + lon_c_orig, lon_c_orig + (self.attributes['inc_lon'] * (self.attributes['n_lon'] - 1)), + self.attributes['n_lon'], dtype=np.float) + + self.boundary_longitudes = self.create_bounds(self.center_longitudes, self.attributes['inc_lon']) + + self.boundary_latitudes = self.boundary_latitudes.reshape((1,) + self.boundary_latitudes.shape) + self.boundary_longitudes = self.boundary_longitudes.reshape((1,) + self.boundary_longitudes.shape) + + self.logger.write_time_log('LatLonGrid', 'create_coords', timeit.default_timer() - spent_time, 2) + + def write_netcdf(self): + """ + Write a regular lat-lon grid NetCDF with empty data + """ + spent_time = timeit.default_timer() + if not os.path.exists(self.netcdf_path): + if not os.path.exists(os.path.dirname(self.netcdf_path)): + os.makedirs(os.path.dirname(self.netcdf_path)) + # Writes an auxiliary empty NetCDF only with the coordinates and an empty variable. + write_coords_netcdf(self.netcdf_path, self.center_latitudes, self.center_longitudes, + [{'name': 'var_aux', 'units': '', 'data': 0}], + boundary_latitudes=self.boundary_latitudes, + boundary_longitudes=self.boundary_longitudes, + regular_latlon=True) + + self.logger.write_log("\tGrid created at '{0}'".format(self.netcdf_path), 3) + self.logger.write_time_log('LatLonGrid', 'write_netcdf', timeit.default_timer() - spent_time, 3) diff --git a/hermesv3_bu/grids/grid_lcc.py b/hermesv3_bu/grids/grid_lcc.py new file mode 100755 index 0000000000000000000000000000000000000000..c5ae6a0ec583988c461209424f8db613a289f3b6 --- /dev/null +++ b/hermesv3_bu/grids/grid_lcc.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python + +import os +import timeit +import numpy as np +from pyproj import Proj +from grid import Grid + +from hermesv3_bu.logger.log import Log + + +class LccGrid(Grid): + + def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, lat_1, lat_2, lon_0, lat_0, + nx, ny, inc_x, inc_y, x_0, y_0, earth_radius=6370000.000): + """ + Lambert Conformal Conic (LCC) grid object that contains all the information to do a lcc output. + + :param logger: Logger. + :type logger: Log + + :param auxiliary_path: Path to the folder to store all the needed auxiliary files. + :type auxiliary_path: str + + :param tstep_num: Number of time steps. + :type tstep_num: int + + :param vertical_description_path: Path to the file that describes the vertical resolution + :type vertical_description_path: str + + :param lat_1: Value of the Lat1 for the LCC grid type. + :type lat_1: float + + :param lat_2: Value of the Lat2 for the LCC grid type. + :type lat_2: float + + :param lon_0: Value of the Lon0 for the LCC grid type. + :type lon_0: float + + :param lat_0: Value of the Lat0 for the LCC grid type. + :type lat_0: float + + :param nx: Number of cells on the x dimension. + :type nx: int + + :param ny: Number of cells on the y dimension. + :type ny: int + + :param inc_x: Increment between x dimensions cell centroids (metres). + :type inc_x: int + + :param inc_y: Increment between y dimensions cell centroids (metres). + :type inc_y: int + + :param x_0: Value of the X0 for the LCC grid type. + :type x_0: float + + :param y_0: Value of the Y0 for the LCC grid type. + :type y_0: float + + :param earth_radius: Radius of the Earth (metres). + Default = 6370000.000 + :type earth_radius: float + """ + spent_time = timeit.default_timer() + logger.write_log('Lambert Conformal Conic grid selected.') + self.grid_type = 'Lambert Conformal Conic' + + # UTM coordinates + self.x = None + self.y = None + + attributes = {'lat_1': lat_1, 'lat_2': lat_2, 'lon_0': lon_0, 'lat_0': lat_0, 'nx': nx, 'ny': ny, + 'inc_x': inc_x, 'inc_y': inc_y, 'x_0': x_0 + (inc_x / 2), 'y_0': y_0 + (inc_y / 2), + 'earth_radius': earth_radius, + 'crs': "+proj=lcc +lat_1={0} +lat_2={1} +lat_0={2} +lon_0={3} +x_0={4} +y_0={5} ".format( + lat_1, lat_2, lat_0, lon_0, 0, 0) + "+datum=WGS84 +units=m"} + + # Initialises with parent class + super(LccGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + self.shape = (tstep_num, len(self.vertical_desctiption), ny, nx) + self.logger.write_time_log('LccGrid', '__init__', timeit.default_timer() - spent_time) + + def write_netcdf(self): + """ + Write a lambert conformal conic grid NetCDF with empty data + """ + from hermesv3_bu.io_server.io_netcdf import write_coords_netcdf + spent_time = timeit.default_timer() + + if not os.path.exists(self.netcdf_path): + if not os.path.exists(os.path.dirname(self.netcdf_path)): + os.makedirs(os.path.dirname(self.netcdf_path)) + # Writes an auxiliary empty NetCDF only with the coordinates and an empty variable. + write_coords_netcdf(self.netcdf_path, self.center_latitudes, self.center_longitudes, + [{'name': 'var_aux', 'units': '', 'data': 0}], + boundary_latitudes=self.boundary_latitudes, + boundary_longitudes=self.boundary_longitudes, lcc=True, lcc_x=self.x, lcc_y=self.y, + lat_1_2="{0}, {1}".format(self.attributes['lat_1'], self.attributes['lat_2']), + lon_0=self.attributes['lon_0'], lat_0=self.attributes['lat_0']) + + self.logger.write_log("\tGrid created at '{0}'".format(self.netcdf_path), 3) + self.logger.write_time_log('LccGrid', 'write_netcdf', timeit.default_timer() - spent_time, 3) + return True + + def create_coords(self): + """ + Create the coordinates for a lambert conformal conic domain. + """ + spent_time = timeit.default_timer() + # Create a regular grid in metres (Two 1D arrays) + self.x = np.linspace(self.attributes['x_0'], self.attributes['x_0'] + + (self.attributes['inc_x'] * (self.attributes['nx'] - 1)), self.attributes['nx'], + dtype=np.float) + self.y = np.linspace(self.attributes['y_0'], self.attributes['y_0'] + + (self.attributes['inc_y'] * (self.attributes['ny'] - 1)), self.attributes['ny'], + dtype=np.float) + + # 1D to 2D + x = np.array([self.x] * len(self.y)) + y = np.array([self.y] * len(self.x)).T + + # Create UTM bounds + y_b = self.create_bounds(y, self.attributes['inc_y'], number_vertices=4, inverse=True) + x_b = self.create_bounds(x, self.attributes['inc_x'], number_vertices=4) + + # Create the LCC projection + projection = Proj( + proj='lcc', + ellps='WGS84', + R=self.attributes['earth_radius'], + lat_1=self.attributes['lat_1'], + lat_2=self.attributes['lat_2'], + lon_0=self.attributes['lon_0'], + lat_0=self.attributes['lat_0'], + to_meter=1, + x_0=0, + y_0=0, + a=self.attributes['earth_radius'], + k_0=1.0) + + # UTM to LCC + self.center_longitudes, self.center_latitudes = projection(x, y, inverse=True) + self.boundary_longitudes, self.boundary_latitudes = projection(x_b, y_b, inverse=True) + + self.logger.write_time_log('LccGrid', 'create_coords', timeit.default_timer() - spent_time, 2) + return True diff --git a/hermesv3_bu/grids/grid_mercator.py b/hermesv3_bu/grids/grid_mercator.py new file mode 100755 index 0000000000000000000000000000000000000000..2c57d6536464423b42a83a2016e022747be1fd73 --- /dev/null +++ b/hermesv3_bu/grids/grid_mercator.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python + +import os +import timeit +import numpy as np +from pyproj import Proj +from grid import Grid +from hermesv3_bu.logger.log import Log + + +class MercatorGrid(Grid): + + def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, lat_ts, lon_0, nx, ny, inc_x, + inc_y, x_0, y_0, earth_radius=6370000.000): + """ + Mercator grid object that contains all the information to do a mercator output. + + :param logger: Logger. + :type logger: Log + + :param auxiliary_path: Path to the folder to store all the needed auxiliary files. + :type auxiliary_path: str + + :param tstep_num: Number of time steps. + :type tstep_num: int + + :param vertical_description_path: Path to the file that describes the vertical resolution + :type vertical_description_path: str + + :param lon_0: Value of the Lon0 for the LCC grid type. + :type lon_0: float + + :param nx: Number of cells on the x dimension. + :type nx: int + + :param ny: Number of cells on the y dimension. + :type ny: int + + :param inc_x: Increment between x dimensions cell centroids (metres). + :type inc_x: int + + :param inc_y: Increment between y dimensions cell centroids (metres). + :type inc_y: int + + :param x_0: Value of the X0 for the LCC grid type. + :type x_0: float + + :param y_0: Value of the Y0 for the LCC grid type. + :type y_0: float + + :param earth_radius: Radius of the Earth (metres). + Default = 6370000.000 + :type earth_radius: float + """ + spent_time = timeit.default_timer() + + logger.write_log('Mercator grid selected.') + self.grid_type = 'Mercator' + attributes = {'lat_ts': lat_ts, 'lon_0': lon_0, 'nx': nx, 'ny': ny, 'inc_x': inc_x, 'inc_y': inc_y, + 'x_0': x_0 + (inc_x / 2), 'y_0': y_0 + (inc_y / 2), 'earth_radius': earth_radius, + 'crs': "+proj=merc +a={2} +b={2} +lat_ts={0} +lon_0={1}".format( + lat_ts, lon_0, earth_radius)} + + # UTM coordinates + self.x = None + self.y = None + + # Initialises with parent class + super(MercatorGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + + self.shape = (tstep_num, len(self.vertical_desctiption), ny, nx) + self.logger.write_time_log('MercatorGrid', '__init__', timeit.default_timer() - spent_time, 3) + + def write_netcdf(self): + """ + Write a mercator grid NetCDF with empty data + """ + from hermesv3_bu.io_server.io_netcdf import write_coords_netcdf + spent_time = timeit.default_timer() + if not os.path.exists(self.netcdf_path): + if not os.path.exists(os.path.dirname(self.netcdf_path)): + os.makedirs(os.path.dirname(self.netcdf_path)) + + # Writes an auxiliary empty NetCDF only with the coordinates and an empty variable. + write_coords_netcdf(self.netcdf_path, self.center_latitudes, self.center_longitudes, + [{'name': 'var_aux', 'units': '', 'data': 0}], + boundary_latitudes=self.boundary_latitudes, + boundary_longitudes=self.boundary_longitudes, + mercator=True, lcc_x=self.x, lcc_y=self.y, lon_0=self.attributes['lon_0'], + lat_ts=self.attributes['lat_ts']) + self.logger.write_log("\tGrid created at '{0}'".format(self.netcdf_path), 3) + self.logger.write_time_log('MercatorGrid', 'write_netcdf', timeit.default_timer() - spent_time, 3) + return True + + def create_coords(self): + """ + Create the coordinates for a mercator domain. + """ + spent_time = timeit.default_timer() + # Create a regular grid in metres (Two 1D arrays) + self.x = np.linspace(self.attributes['x_0'], self.attributes['x_0'] + + (self.attributes['inc_x'] * (self.attributes['nx'] - 1)), self.attributes['nx'], + dtype=np.float) + self.y = np.arange(self.attributes['y_0'], self.attributes['y_0'] + + (self.attributes['inc_y'] * (self.attributes['ny'] - 1)), self.attributes['ny'], + dtype=np.float) + + # 1D to 2D + x = np.array([self.x] * len(self.y)) + y = np.array([self.y] * len(self.x)).T + + # Create UTM bounds + y_b = self.create_bounds(y, self.attributes['inc_y'], number_vertices=4, inverse=True) + x_b = self.create_bounds(x, self.attributes['inc_x'], number_vertices=4) + + # Create the LCC projection + projection = Proj(self.attributes['crs']) + + # UTM to Mercator + self.center_longitudes, self.center_latitudes = projection(x, y, inverse=True) + self.boundary_longitudes, self.boundary_latitudes = projection(x_b, y_b, inverse=True) + + self.logger.write_time_log('MercatorGrid', 'create_coords', timeit.default_timer() - spent_time, 3) + + return True diff --git a/hermesv3_bu/grids/grid_rotated.py b/hermesv3_bu/grids/grid_rotated.py new file mode 100755 index 0000000000000000000000000000000000000000..3ddf526237621acfafdb02d31ed345ae8a417cfa --- /dev/null +++ b/hermesv3_bu/grids/grid_rotated.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python + +import os +import timeit +from grid import Grid +import numpy as np +import math + +from hermesv3_bu.logger.log import Log + + +class RotatedGrid(Grid): + def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, centre_lat, centre_lon, + west_boundary, south_boundary, inc_rlat, inc_rlon): + """ + + :param logger: Logger. + :type logger: Log + + :param auxiliary_path: + :param tstep_num: + :param vertical_description_path: + :param centre_lat: + :param centre_lon: + :param west_boundary: + :param south_boundary: + :param inc_rlat: + :param inc_rlon: + """ + spent_time = timeit.default_timer() + + self.rlat = None + self.rlon = None + + logger.write_log('Rotated grid selected.') + self.grid_type = 'Rotated' + attributes = {'new_pole_longitude_degrees': -180 + centre_lon, 'new_pole_latitude_degrees': centre_lat, + 'centre_lat': centre_lat, 'centre_lon': centre_lon, 'west_boundary': west_boundary, + 'south_boundary': south_boundary, 'inc_rlat': inc_rlat, 'inc_rlon': inc_rlon, + 'n_lat': int((abs(south_boundary) / inc_rlat) * 2 + 1), + 'n_lon': int((abs(west_boundary) / inc_rlon) * 2 + 1), 'crs': {'init': 'epsg:4326'}} + + # Initialises with parent class + super(RotatedGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + + self.shape = (tstep_num, len(self.vertical_desctiption), len(self.rlat), len(self.rlon)) + self.logger.write_time_log('RotatedGrid', '__init__', timeit.default_timer() - spent_time, 3) + + def create_regular_rotated(self): + """ + Create a regular grid on the rotated domain. + + :return: center_latitudes, center_longitudes, corner_latitudes, corner_longitudes + :rtype: tuple + """ + spent_time = timeit.default_timer() + + center_latitudes = np.linspace(self.attributes['south_boundary'], self.attributes['south_boundary'] + + (self.attributes['inc_rlat'] * (self.attributes['n_lat'] - 1)), + self.attributes['n_lat'], dtype=np.float) + center_longitudes = np.linspace(self.attributes['west_boundary'], self.attributes['west_boundary'] + + (self.attributes['inc_rlon'] * (self.attributes['n_lon'] - 1)), + self.attributes['n_lon'], dtype=np.float) + + corner_latitudes = self.create_bounds(center_latitudes, self.attributes['inc_rlat'], number_vertices=4, + inverse=True) + corner_longitudes = self.create_bounds(center_longitudes, self.attributes['inc_rlon'], number_vertices=4) + + self.logger.write_time_log('RotatedGrid', 'create_regular_rotated', timeit.default_timer() - spent_time, 3) + return center_latitudes, center_longitudes, corner_latitudes, corner_longitudes + + def create_coords(self): + """ + Create the coordinates for a rotated domain. + """ + spent_time = timeit.default_timer() + # Create rotated coordinates + (self.rlat, self.rlon, br_lats_single, br_lons_single) = self.create_regular_rotated() + + # 1D to 2D + c_lats = np.array([self.rlat] * len(self.rlon)).T + c_lons = np.array([self.rlon] * len(self.rlat)) + + # Create rotated boundary coordinates + b_lats = super(RotatedGrid, self).create_bounds(c_lats, self.attributes['inc_rlat'], number_vertices=4, + inverse=True) + b_lons = super(RotatedGrid, self).create_bounds(c_lons, self.attributes['inc_rlon'], number_vertices=4) + + # Rotated to Lat-Lon + self.boundary_longitudes, self.boundary_latitudes = self.rotated2latlon(b_lons, b_lats) + self.center_longitudes, self.center_latitudes = self.rotated2latlon(c_lons, c_lats) + + self.logger.write_time_log('RotatedGrid', 'create_coords', timeit.default_timer() - spent_time, 3) + return True + + def rotated2latlon(self, lon_deg, lat_deg, lon_min=-180): + """ + Calculate the unrotated coordinates using the rotated ones. + + :param lon_deg: Rotated longitude coordinate. + :type lon_deg: numpy.array + + :param lat_deg: Rotated latitude coordinate. + :type lat_deg: numpy.array + + :param lon_min: Minimum value for the longitudes: -180 (-180 to 180) or 0 (0 to 360) + :type lon_min: float + + :return: Unrotated coordinates. Longitudes, Latitudes + :rtype: tuple(numpy.array, numpy.array) + """ + spent_time = timeit.default_timer() + degrees_to_radians = math.pi / 180. + # radians_to_degrees = 180. / math.pi + + # Positive east to negative east + # self.new_pole_longitude_degrees -= 180 + + tph0 = self.attributes['new_pole_latitude_degrees'] * degrees_to_radians + tlm = lon_deg * degrees_to_radians + tph = lat_deg * degrees_to_radians + tlm0d = self.attributes['new_pole_longitude_degrees'] + ctph0 = np.cos(tph0) + stph0 = np.sin(tph0) + + stlm = np.sin(tlm) + ctlm = np.cos(tlm) + stph = np.sin(tph) + ctph = np.cos(tph) + + # Latitude + sph = (ctph0 * stph) + (stph0 * ctph * ctlm) + # if sph > 1.: + # sph = 1. + # if sph < -1.: + # sph = -1. + # print type(sph) + sph[sph > 1.] = 1. + sph[sph < -1.] = -1. + + aph = np.arcsin(sph) + aphd = aph / degrees_to_radians + + # Longitude + anum = ctph * stlm + denom = (ctlm * ctph - stph0 * sph) / ctph0 + relm = np.arctan2(anum, denom) - math.pi + almd = relm / degrees_to_radians + tlm0d + + # if almd < min_lon: + # almd += 360 + # elif almd > max_lon: + # almd -= 360 + almd[almd > (lon_min + 360)] -= 360 + almd[almd < lon_min] += 360 + + self.logger.write_time_log('RotatedGrid', 'rotated2latlon', timeit.default_timer() - spent_time, 3) + + return almd, aphd + + def write_netcdf(self): + """ + Write a rotated grid NetCDF with empty data + """ + from hermesv3_bu.io_server.io_netcdf import write_coords_netcdf + spent_time = timeit.default_timer() + if not os.path.exists(self.netcdf_path): + if not os.path.exists(os.path.dirname(self.netcdf_path)): + os.makedirs(os.path.dirname(self.netcdf_path)) + # Writes an auxiliary empty NetCDF only with the coordinates and an empty variable. + write_coords_netcdf(self.netcdf_path, self.center_latitudes, self.center_longitudes, + [{'name': 'var_aux', 'units': '', 'data': 0}], + boundary_latitudes=self.boundary_latitudes, + boundary_longitudes=self.boundary_longitudes, + rotated=True, rotated_lats=self.rlat, rotated_lons=self.rlon, + north_pole_lat=90 - self.attributes['new_pole_latitude_degrees'], + north_pole_lon=self.attributes['new_pole_longitude_degrees']) + self.logger.write_log("\tGrid created at '{0}'".format(self.netcdf_path), 3) + self.logger.write_time_log('RotatedGrid', 'write_netcdf', timeit.default_timer() - spent_time, 3) + return True diff --git a/hermesv3_bu/hermes.py b/hermesv3_bu/hermes.py index f73e83c1662f771d1347f577220933e0a59d3eaa..8a8015ccccfd32e5c69b1064c41bf679d5ce6ada 100755 --- a/hermesv3_bu/hermes.py +++ b/hermesv3_bu/hermes.py @@ -1,270 +1,76 @@ #!/usr/bin/env python -""" -Copyright 2018 Earth Sciences Department, BSC-CNS - - This file is part of HERMESv3. - - HERMESv3 is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - HERMESv3 is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with HERMESv3. If not, see . -""" - -__author__ = "Carles Tena" -__copyright__ = "Copyright 2018" -__email__ = "carles.tena@bsc.es" -__license__ = "GNU General Public License" -__maintainer__ = "Carles Tena" -__version__ = "3.3.1" - -from memory_profiler import profile import sys import os +import timeit from mpi4py import MPI +from datetime import timedelta -parentPath = os.path.abspath(os.path.join('..', '..')) -if parentPath not in sys.path: - sys.path.insert(0, parentPath) - -from timeit import default_timer as gettime - -from hermesv3_bu.config import settings from hermesv3_bu.config.config import Config -from hermesv3_bu.modules.emision_inventories.emission_inventory import EmissionInventory -from hermesv3_bu.modules.vertical.vertical import VerticalDistribution -from hermesv3_bu.modules.temporal.temporal import TemporalDistribution -from hermesv3_bu.modules.bottomup.traffic.traffic import Traffic -from hermesv3_bu.modules.writing.writing_cmaq import WritingCmaq -from hermesv3_bu.modules.writing.writing import Writing -from hermesv3_bu.tools.netcdf_tools import * -from hermesv3_bu.modules.bottomup.point_source.point_source import PointSource -# import pyextrae.sequential as pyextrae +from hermesv3_bu.grids.grid import select_grid +from hermesv3_bu.clipping.clip import select_clip +from hermesv3_bu.writer.writer import select_writer +from hermesv3_bu.sectors.sector_manager import SectorManager +from hermesv3_bu.logger.log import Log class Hermes(object): """ Interface class for HERMESv3. """ - def __init__(self, config, new_date=None): - from hermesv3_bu.modules.grids.grid import Grid - from hermesv3_bu.modules.temporal.temporal import TemporalDistribution + def __init__(self, config): + self.initial_time = timeit.default_timer() + self.comm = MPI.COMM_WORLD - st_time = gettime() + self.arguments = config.arguments + self.logger = Log(self.comm, self.arguments) + self.logger.write_log('====== Starting HERMESv3_BU simulation =====') + self.grid = select_grid(self.comm, self.logger, self.arguments) + self.clip = select_clip(self.comm, self.logger, self.arguments.auxiliary_files_path, self.arguments.clipping, + self.grid) + self.date_array = [self.arguments.start_date + timedelta(hours=hour) for hour in + xrange(self.arguments.output_timestep_num)] + self.logger.write_log('Dates to simulate: {0}'.format( + [aux_date.strftime("%Y/%m/%d, %H:%M:%S") for aux_date in self.date_array]), message_level=2) - self.config = config - self.options = config.options + self.sector_manager = SectorManager( + self.comm, self.logger, self.grid, self.clip, self.date_array, self.arguments) - # updating starting date - if new_date is not None: - self.options.start_date = new_date + self.writer = select_writer(self.logger, self.arguments, self.grid, self.date_array) - config.set_log_level() + self.logger.write_time_log('Hermes', '__init__', timeit.default_timer() - self.initial_time) - self.grid = Grid.select_grid(self.options.domain_type, self.options.vertical_description, self.options.output_timestep_num, self.options.auxiliar_files_path, self.options.inc_lat, - self.options.inc_lon, self.options.centre_lat, self.options.centre_lon, - self.options.west_boundary, self.options.south_boundary, self.options.inc_rlat, - self.options.inc_rlon, - self.options.lat_1, self.options.lat_2, self.options.lon_0, self.options.lat_0, - self.options.nx, self.options.ny, self.options.inc_x, self.options.inc_y, - self.options.x_0, self.options.y_0) - if not self.options.do_bottomup: - self.emission_list = EmissionInventory.make_emission_list(self.options, self.grid, self.options.start_date) - else: - if self.options.do_traffic: - self.traffic = Traffic(self.options.auxiliar_files_path, self.options.clipping, - self.options.road_link_path, self.options.fleet_compo_path, - self.options.speed_hourly_path, self.options.traffic_monthly_profiles, - self.options.traffic_daily_profiles, self.options.traffic_hourly_profiles_mean, - self.options.traffic_hourly_profiles_weekday, - self.options.traffic_hourly_profiles_saturday, - self.options.traffic_hourly_profiles_sunday, self.options.ef_path, - self.options.traffic_pollutants, self.options.start_date, self.grid, - vehicle_list=self.options.vehicle_types, - load=self.options.load, - timestep_type=self.options.output_timestep_type, - timestep_num=self.options.output_timestep_num, - timestep_freq=self.options.output_timestep_freq, - speciation_map=self.options.traffic_speciation_map, - hot_cold_speciation=self.options.traffic_speciation_profile_hot_cold, - tyre_speciation=self.options.traffic_speciation_profile_tyre, - road_speciation=self.options.traffic_speciation_profile_road, - brake_speciation=self.options.traffic_speciation_profile_brake, - resuspension_speciation=self.options.traffic_speciation_profile_resuspension, - - temp_common_path=self.options.temperature_files_path, - output_type=self.options.output_type, output_dir=self.options.output_dir, - molecular_weights_path=self.options.molecular_weights,) - if self.options.do_point_sources: - self.poin_source = PointSource( - self.grid, self.options.point_source_catalog, self.options.point_source_monthly_profiles, - self.options.point_source_daily_profiles, self.options.point_source_hourly_profiles, - self.options.point_source_speciation_map, self.options.point_source_speciation_profiles, - self.options.point_source_snaps, self.options.effective_stack_height, - self.options.point_source_pollutants, self.options.point_source_measured_emissions, - molecular_weights_path=self.options.molecular_weights) - - self.delta_hours = TemporalDistribution.calculate_delta_hours(self.options.start_date, - self.options.output_timestep_type, - self.options.output_timestep_num, - self.options.output_timestep_freq) - self.levels = VerticalDistribution.get_vertical_output_profile(self.options.vertical_description) - - print 'TIME -> HERMES.__init__: Rank {0} {1} s'.format(settings.rank, round(gettime() - st_time, 2)) - - # @profile def main(self): """ Main functionality of the model. """ - from multiprocessing import Process, Queue, cpu_count - from threading import Thread - import copy - import gc - import numpy as np from datetime import timedelta - from cf_units import Unit - - if settings.log_level_1: - print '====================================================' - print '==================== HERMESv3.0 ====================' - print '====================================================' - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - # date_aux = self.options.start_date - # while date_aux <= self.options.end_date: - if settings.log_level_1: - print '\n\t================================================' - print '\t\t STARTING emissions for {0}'.format(self.options.start_date.strftime('%Y/%m/%d %H:%M:%S')) - print '\t================================================' - st_time_1 = gettime() - else: - st_time_1 = None - if not self.options.do_bottomup: - for ei in self.emission_list: - ei.do_regrid() - if ei.vertical is not None: - vf_time = gettime() - ei.vertical_factors = ei.vertical.calculate_weights() - print "TIME -> Vertical_factors: {0} Rank {1} {2} s\n".format("{0}_{1}".format(ei.inventory_name, ei.sector), settings.rank, round(gettime() - vf_time, 4)) - if ei.temporal is not None: - tf_time = gettime() - ei.temporal_factors = ei.temporal.calculate_3d_temporal_factors() - print "TIME -> Temporal_factors: {0} Rank {1} {2} s\n".format("{0}_{1}".format(ei.inventory_name, ei.sector), settings.rank, round(gettime() - tf_time, 4)) - if ei.speciation is not None: - sp_time = gettime() - ei.emissions = ei.speciation.do_speciation(ei.emissions, self.grid.cell_area) - print "TIME -> Speciation: {0} Rank {1} {2} s\n".format("{0}_{1}".format(ei.inventory_name, ei.sector), settings.rank, round(gettime() - sp_time, 4)) - else: - if self.options.do_traffic: - e = self.traffic.calculate_traffic_line_emissions( - do_hot=self.options.do_hot, do_cold=self.options.do_cold, do_tyre_wear=self.options.do_tyre_wear, - do_brake_wear=self.options.do_brake_wear, do_road_wear=self.options.do_road_wear, - do_resuspension=self.options.do_resuspension, do_evaporative=self.options.do_evaporative, - do_other_cities=self.options.do_other_cities) + emis = self.sector_manager.run() + waiting_time = timeit.default_timer() + self.comm.Barrier() + self.logger.write_log('All emissions calculated!') + self.logger.write_time_log('Hermes', 'Waiting_to_write', timeit.default_timer() - waiting_time) - if self.options.output_type == 'R-LINE': - self.traffic.write_rline(e, self.options.output_dir, self.options.start_date) + self.writer.write(emis) + self.comm.Barrier() - if settings.log_level_1: - print '\t==========================================' - print '\t\t TIME {0} -> {1}'.format(self.options.start_date.strftime('%Y/%m/%d %H:%M:%S'), - round(gettime() - st_time_1, 2)) - print '\t==========================================' + self.logger.write_log('***** HERMES simulation finished succesful *****') + self.logger.write_time_log('Hermes', 'TOTAL', timeit.default_timer() - self.initial_time) + self.logger.finish_logs() - if self.options.start_date < self.options.end_date: - return self.options.start_date + timedelta(days=1) - return None - - self.emission_list = self.traffic.links_to_grid(e, self.grid.to_shapefile()) - - if self.options.output_type == 'MONARCH': - pass - # TODO divide by cell/area - if self.options.do_point_sources: - e = self.poin_source.calculate_point_source_emissions( - self.options.start_date, self.delta_hours, self.levels) - self.emission_list = self.poin_source.points_to_grid( - e, self.grid.to_shapefile(), self.poin_source.speciation_map['dst'].values) - - writing_time = gettime() - - if self.options.output_type == 'CMAQ': - writer = WritingCmaq - elif self.options.output_type == 'MONARCH': - writer = Writing - - if self.options.do_bottomup: - if settings.rank == 0: - writer.write_netcdf(self.config.get_output_name(self.options.start_date), self.grid, - self.emission_list, - levels=VerticalDistribution.get_vertical_output_profile( - self.options.vertical_description), - date=self.options.start_date, hours=self.delta_hours, - point_source=self.options.do_point_sources) - - else: - empty_dict = {} - for ei in self.emission_list: - for emi in ei.emissions: - if not emi['name'] in empty_dict: - dict_aux = emi.copy() - dict_aux['data'] = None - empty_dict[emi['name']] = dict_aux - - if settings.writing_serial: - writer.write_serial_netcdf(self.config.get_output_name(self.options.start_date), self.grid, empty_dict.values(), - self.emission_list, - levels=VerticalDistribution.get_vertical_output_profile( - self.options.vertical_description), - date=self.options.start_date, hours=self.delta_hours) - else: - if settings.rank == 0: - print "TIME -> empty_list: {0} s\n".format(round(gettime() - writing_time, 2)) - writer.create_parallel_netcdf(self.config.get_output_name(self.options.start_date), self.grid, empty_dict.values(), - levels=VerticalDistribution.get_vertical_output_profile( - self.options.vertical_description), - date=self.options.start_date, hours=self.delta_hours) - print 'NETCDF CREATED. Starting to write' - settings.comm.Barrier() - if settings.rank == 0: - print 'Starting to write' - writer.write_parallel_netcdf(self.config.get_output_name(self.options.start_date), self.grid, empty_dict.keys(), - self.emission_list) - - print "TIME -> Writing Rank {0} {1} s\n".format(settings.rank, round(gettime() - writing_time, 2)) - settings.comm.Barrier() - if settings.log_level_2: - print "TIME -> TOTAL Writing: {0} s\n".format(round(gettime() - writing_time, 2)) - if settings.log_level_1: - print '\t==========================================' - print '\t\t TIME {0} -> {1}'.format(self.options.start_date.strftime('%Y/%m/%d %H:%M:%S'), - round(gettime() - st_time_1, 2)) - print '\t==========================================' - - if settings.log_level_3: - print 'TIME -> HERMES.main: {0} s\n'.format(round(gettime() - st_time, 2)) - - if self.options.start_date < self.options.end_date: - return self.options.start_date + timedelta(days=1) + if self.arguments.start_date < self.arguments.end_date: + return self.arguments.start_date + timedelta(days=1) return None -if __name__ == '__main__': +def run(): date = Hermes(Config()).main() while date is not None: - date = Hermes(Config(), new_date=date).main() + date = Hermes(Config(new_date=date)).main() sys.exit(0) + + +if __name__ == '__main__': + run() diff --git a/hermesv3_bu/modules/masking/__init__.py b/hermesv3_bu/io_server/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from hermesv3_bu/modules/masking/__init__.py rename to hermesv3_bu/io_server/__init__.py diff --git a/hermesv3_bu/io_server/io_netcdf.py b/hermesv3_bu/io_server/io_netcdf.py new file mode 100755 index 0000000000000000000000000000000000000000..ad47f657bdd50119963e6acad5956f548782904d --- /dev/null +++ b/hermesv3_bu/io_server/io_netcdf.py @@ -0,0 +1,442 @@ +#!/usr/bin/env python + +import sys +import os +from mpi4py import MPI +from datetime import timedelta +from hermesv3_bu.io_server.io_server import IoServer +import numpy as np +import geopandas as gpd +from netCDF4 import Dataset +from shapely.geometry import Point +from cf_units import num2date, CALENDAR_STANDARD + +from geopandas import GeoDataFrame + + +class IoNetcdf(IoServer): + def __init__(self, comm): + if comm is None: + comm = MPI.COMM_WORLD + super(IoNetcdf, self).__init__(comm) + + def get_data_from_netcdf(self, netcdf_path, var_name, date_type, date, geometry_df): + """ + Read for extract a NetCDF variable in the desired points. + + :param netcdf_path: Path to the NetCDF that contains the data to extract. + :type netcdf_path: str + + :param var_name: Name of the NetCDF variable to extract. + :type var_name: str + + :param date_type: Option to set if we want to extract a 'daily' variable or a 'yearly' one. + :type date_type: str + + :param date: Date of the day to extract. + :type date: datetime.date + + :param geometry_df: GeoDataframe with the point where extract the variables. + :type geometry_df: geopandas.GeoDataframe + + :return: GeoDataframe with the data in the desired points. + :rtype: geopandas.GeoDataframe + """ + nc = Dataset(netcdf_path, mode='r') + lat_o = nc.variables['latitude'][:] + lon_o = nc.variables['longitude'][:] + + if date_type == 'daily': + time = nc.variables['time'] + # From time array to list of dates. + time_array = num2date(time[:], time.units, CALENDAR_STANDARD) + time_array = np.array([aux.date() for aux in time_array]) + i_time = np.where(time_array == date)[0][0] + elif date_type == 'yearly': + i_time = 0 + + # Find the index to read all the necessary information but avoiding to read as many unused data as we can + i_min, i_max, j_min, j_max = self.find_lonlat_index( + lon_o, lat_o, geometry_df['c_lon'].min(), geometry_df['c_lon'].max(), + geometry_df['c_lat'].min(), geometry_df['c_lat'].max()) + + # Clips the lat lons + lon_o = lon_o[i_min:i_max] + lat_o = lat_o[j_min:j_max] + + # From 1D to 2D + lat = np.array([lat_o[:]] * len(lon_o[:])).T.flatten() + lon = np.array([lon_o[:]] * len(lat_o[:])).flatten() + del lat_o, lon_o + + # Reads the tas variable of the xone and the times needed. + var = nc.variables[var_name][i_time, j_min:j_max, i_min:i_max] + nc.close() + + var_df = gpd.GeoDataFrame(var.flatten().T, columns=[var_name], crs={'init': 'epsg:4326'}, + geometry=[Point(xy) for xy in zip(lon, lat)]) + var_df.loc[:, 'REC'] = var_df.index + + return var_df + + def get_hourly_data_from_netcdf(self, lon_min, lon_max, lat_min, lat_max, netcdf_dir, var_name, date_array): + """ + Reads the temperature from the ERA5 var value. + It will return only the involved cells of the NetCDF in DataFrame format. + + To clip the global NetCDF to the desired region it is needed the minimum and maximum value of the latitudes and + longitudes of the centroids of all the road links. + + :param lon_min: Minimum longitude of the centroid of the road links. + :type lon_min: float + + :param lon_max: Maximum longitude of the centroid of the road links. + :type lon_max: float + + :param lat_min: Minimum latitude of the centroid of the road links. + :type lat_min: float + + :param lat_max: Maximum latitude of the centroid of the road links. + :type lat_max: float + + :return: Temperature, centroid of the cell and cell identificator (REC). + Each time step is each column with the name t_. + :rtype: GeoDataFrame + """ + path = os.path.join(netcdf_dir, '{0}_{1}{2}.nc'.format(var_name, date_array[0].year, + str(date_array[0].month).zfill(2))) + # self.logger.write_log('Getting temperature from {0}'.format(path), message_level=2) + + nc = Dataset(path, mode='r') + lat_o = nc.variables['latitude'][:] + lon_o = nc.variables['longitude'][:] + time = nc.variables['time'] + # From time array to list of dates. + time_array = num2date(time[:], time.units, CALENDAR_STANDARD) + i_time = np.where(time_array == date_array[0])[0][0] + + # Correction to set the longitudes from -180 to 180 instead of from 0 to 360. + if lon_o.max() > 180: + lon_o[lon_o > 180] -= 360 + + # Finds the array positions for the clip. + i_min, i_max, j_min, j_max = self.find_lonlat_index(lon_o, lat_o, lon_min, lon_max, lat_min, lat_max) + + # Clips the lat lons + lon_o = lon_o[i_min:i_max] + lat_o = lat_o[j_min:j_max] + + # From 1D to 2D + lat = np.array([lat_o[:]] * len(lon_o[:])).T.flatten() + lon = np.array([lon_o[:]] * len(lat_o[:])).flatten() + del lat_o, lon_o + + # Reads the var variable of the xone and the times needed. + var = nc.variables[var_name][i_time:i_time + (len(date_array)), j_min:j_max, i_min:i_max] + + nc.close() + # That condition is fot the cases that the needed temperature is in a different NetCDF. + while len(var) < len(date_array): + aux_date = date_array[len(var) + 1] + path = os.path.join(netcdf_dir, '{0}_{1}{2}.nc'.format(var_name, aux_date.year, + str(aux_date.month).zfill(2))) + # self.logger.write_log('Getting {0} from {1}'.format(var_name, path), message_level=2) + nc = Dataset(path, mode='r') + i_time = 0 + new_var = nc.variables[var_name][i_time:i_time + (len(date_array) - len(var)), j_min:j_max, i_min:i_max] + + var = np.concatenate([var, new_var]) + + nc.close() + + var = var.reshape((var.shape[0], var.shape[1] * var.shape[2])) + df = gpd.GeoDataFrame(var.T, geometry=[Point(xy) for xy in zip(lon, lat)]) + # df.columns = ['t_{0}'.format(x) for x in df.columns.values[:-1]] + ['geometry'] + df.loc[:, 'REC'] = df.index + + return df + + @staticmethod + def find_lonlat_index(lon, lat, lon_min, lon_max, lat_min, lat_max): + """ + Find the NetCDF index to extract all the data avoiding the maximum of unused data. + + :param lon: Longitudes array from the NetCDF. + :type lon: numpy.array + + :param lat: Latitude array from the NetCDF. + :type lat: numpy.array + + :param lon_min: Minimum longitude of the point for the needed date. + :type lon_min float + + :param lon_max: Maximum longitude of the point for the needed date. + :type lon_max: float + + :param lat_min: Minimum latitude of the point for the needed date. + :type lat_min: float + + :param lat_max: Maximum latitude of the point for the needed date. + :type lat_max: float + + :return: Tuple with the four index of the NetCDF + :rtype: tuple + """ + import numpy as np + + aux = lon - lon_min + aux[aux > 0] = np.nan + i_min = np.where(aux == np.nanmax(aux))[0][0] + + aux = lon - lon_max + aux[aux < 0] = np.nan + i_max = np.where(aux == np.nanmin(aux))[0][0] + + aux = lat - lat_min + aux[aux > 0] = np.nan + j_max = np.where(aux == np.nanmax(aux))[0][0] + + aux = lat - lat_max + aux[aux < 0] = np.nan + j_min = np.where(aux == np.nanmin(aux))[0][0] + + return i_min, i_max + 1, j_min, j_max + 1 + + +def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_list, levels=None, date=None, hours=None, + boundary_latitudes=None, boundary_longitudes=None, cell_area=None, global_attributes=None, + regular_latlon=False, + rotated=False, rotated_lats=None, rotated_lons=None, north_pole_lat=None, north_pole_lon=None, + lcc=False, lcc_x=None, lcc_y=None, lat_1_2=None, lon_0=None, lat_0=None, + mercator=False, lat_ts=None): + + from netCDF4 import Dataset + from cf_units import Unit, encode_time + + if not (regular_latlon or lcc or rotated or mercator): + regular_latlon = True + netcdf = Dataset(netcdf_path, mode='w', format="NETCDF4") + + # ===== Dimensions ===== + if regular_latlon: + var_dim = ('lat', 'lon',) + + # Latitude + if len(center_latitudes.shape) == 1: + netcdf.createDimension('lat', center_latitudes.shape[0]) + lat_dim = ('lat',) + elif len(center_latitudes.shape) == 2: + netcdf.createDimension('lat', center_latitudes.shape[0]) + lat_dim = ('lon', 'lat', ) + else: + print 'ERROR: Latitudes must be on a 1D or 2D array instead of {0}'.format(len(center_latitudes.shape)) + sys.exit(1) + + # Longitude + if len(center_longitudes.shape) == 1: + netcdf.createDimension('lon', center_longitudes.shape[0]) + lon_dim = ('lon',) + elif len(center_longitudes.shape) == 2: + netcdf.createDimension('lon', center_longitudes.shape[1]) + lon_dim = ('lon', 'lat', ) + else: + print 'ERROR: Longitudes must be on a 1D or 2D array instead of {0}'.format(len(center_longitudes.shape)) + sys.exit(1) + elif rotated: + var_dim = ('rlat', 'rlon',) + + # Rotated Latitude + if rotated_lats is None: + print 'ERROR: For rotated grids is needed the rotated latitudes.' + sys.exit(1) + netcdf.createDimension('rlat', len(rotated_lats)) + lat_dim = ('rlat', 'rlon',) + + # Rotated Longitude + if rotated_lons is None: + print 'ERROR: For rotated grids is needed the rotated longitudes.' + sys.exit(1) + netcdf.createDimension('rlon', len(rotated_lons)) + lon_dim = ('rlat', 'rlon',) + elif lcc or mercator: + var_dim = ('y', 'x',) + + netcdf.createDimension('y', len(lcc_y)) + lat_dim = ('y', 'x', ) + + netcdf.createDimension('x', len(lcc_x)) + lon_dim = ('y', 'x', ) + else: + lat_dim = None + lon_dim = None + var_dim = None + + # Levels + if levels is not None: + netcdf.createDimension('lev', len(levels)) + + # Bounds + if boundary_latitudes is not None: + try: + netcdf.createDimension('nv', len(boundary_latitudes[0, 0])) + except TypeError: + netcdf.createDimension('nv', boundary_latitudes.shape[1]) + + # Time + netcdf.createDimension('time', None) + + # ===== Variables ===== + # Time + if date is None: + time = netcdf.createVariable('time', 'd', ('time',), zlib=True) + time.units = "months since 2000-01-01 00:00:00" + time.standard_name = "time" + time.calendar = "gregorian" + time.long_name = "time" + time[:] = [0.] + else: + time = netcdf.createVariable('time', 'd', ('time',), zlib=True) + u = Unit('hours') + # print u.offset_by_time(encode_time(date.year, date.month, date.day, date.hour, date.minute, date.second)) + # Unit('hour since 1970-01-01 00:00:00.0000000 UTC') + time.units = str(u.offset_by_time(encode_time(date.year, date.month, date.day, date.hour, date.minute, + date.second))) + time.standard_name = "time" + time.calendar = "gregorian" + time.long_name = "time" + time[:] = hours + + # Latitude + lats = netcdf.createVariable('lat', 'f', lat_dim, zlib=True) + lats.units = "degrees_north" + lats.axis = "Y" + lats.long_name = "latitude coordinate" + lats.standard_name = "latitude" + lats[:] = center_latitudes + + if boundary_latitudes is not None: + lats.bounds = "lat_bnds" + lat_bnds = netcdf.createVariable('lat_bnds', 'f', lat_dim + ('nv',), zlib=True) + # print lat_bnds[:].shape, boundary_latitudes.shape + lat_bnds[:] = boundary_latitudes + + # Longitude + lons = netcdf.createVariable('lon', 'f', lon_dim, zlib=True) + + lons.units = "degrees_east" + lons.axis = "X" + lons.long_name = "longitude coordinate" + lons.standard_name = "longitude" + # print 'lons:', lons[:].shape, center_longitudes.shape + lons[:] = center_longitudes + if boundary_longitudes is not None: + lons.bounds = "lon_bnds" + lon_bnds = netcdf.createVariable('lon_bnds', 'f', lon_dim + ('nv',), zlib=True) + lon_bnds[:] = boundary_longitudes + + if rotated: + # Rotated Latitude + rlat = netcdf.createVariable('rlat', 'f', ('rlat',), zlib=True) + rlat.long_name = "latitude in rotated pole grid" + rlat.units = Unit("degrees").symbol + rlat.standard_name = "grid_latitude" + rlat[:] = rotated_lats + + # Rotated Longitude + rlon = netcdf.createVariable('rlon', 'f', ('rlon',), zlib=True) + rlon.long_name = "longitude in rotated pole grid" + rlon.units = Unit("degrees").symbol + rlon.standard_name = "grid_longitude" + rlon[:] = rotated_lons + if lcc or mercator: + x = netcdf.createVariable('x', 'd', ('x',), zlib=True) + x.units = Unit("km").symbol + x.long_name = "x coordinate of projection" + x.standard_name = "projection_x_coordinate" + x[:] = lcc_x + + y = netcdf.createVariable('y', 'd', ('y',), zlib=True) + y.units = Unit("km").symbol + y.long_name = "y coordinate of projection" + y.standard_name = "projection_y_coordinate" + y[:] = lcc_y + + cell_area_dim = var_dim + # Levels + if levels is not None: + var_dim = ('lev',) + var_dim + lev = netcdf.createVariable('lev', 'f', ('lev',), zlib=True) + lev.units = Unit("m").symbol + lev.positive = 'up' + lev[:] = levels + + # All variables + if len(data_list) is 0: + var = netcdf.createVariable('aux_var', 'f', ('time',) + var_dim, zlib=True) + var[:] = 0 + for variable in data_list: + # print ('time',) + var_dim + var = netcdf.createVariable(variable['name'], 'f', ('time',) + var_dim, zlib=True) + var.units = Unit(variable['units']).symbol + if 'long_name' in variable: + var.long_name = str(variable['long_name']) + if 'standard_name' in variable: + var.standard_name = str(variable['standard_name']) + if 'cell_method' in variable: + var.cell_method = str(variable['cell_method']) + var.coordinates = "lat lon" + if cell_area is not None: + var.cell_measures = 'area: cell_area' + if regular_latlon: + var.grid_mapping = 'crs' + elif rotated: + var.grid_mapping = 'rotated_pole' + elif lcc: + var.grid_mapping = 'Lambert_conformal' + elif mercator: + var.grid_mapping = 'mercator' + try: + var[:] = variable['data'] + except ValueError: + print 'VAR ERROR, netcdf shape: {0}, variable shape: {1}'.format(var[:].shape, variable['data'].shape) + + # Grid mapping + if regular_latlon: + # CRS + mapping = netcdf.createVariable('crs', 'i') + mapping.grid_mapping_name = "latitude_longitude" + mapping.semi_major_axis = 6371000.0 + mapping.inverse_flattening = 0 + elif rotated: + # Rotated pole + mapping = netcdf.createVariable('rotated_pole', 'c') + mapping.grid_mapping_name = 'rotated_latitude_longitude' + mapping.grid_north_pole_latitude = 90 - north_pole_lat + mapping.grid_north_pole_longitude = north_pole_lon + elif lcc: + # CRS + mapping = netcdf.createVariable('Lambert_conformal', 'i') + mapping.grid_mapping_name = "lambert_conformal_conic" + mapping.standard_parallel = lat_1_2 + mapping.longitude_of_central_meridian = lon_0 + mapping.latitude_of_projection_origin = lat_0 + elif mercator: + # Mercator + mapping = netcdf.createVariable('mercator', 'i') + mapping.grid_mapping_name = "mercator" + mapping.longitude_of_projection_origin = lon_0 + mapping.standard_parallel = lat_ts + + # Cell area + if cell_area is not None: + c_area = netcdf.createVariable('cell_area', 'f', cell_area_dim) + c_area.long_name = "area of the grid cell" + c_area.standard_name = "cell_area" + c_area.units = Unit("m2").symbol + # print c_area[:].shape, cell_area.shape + c_area[:] = cell_area + + if global_attributes is not None: + netcdf.setncatts(global_attributes) + + netcdf.close() diff --git a/hermesv3_bu/io_server/io_raster.py b/hermesv3_bu/io_server/io_raster.py new file mode 100755 index 0000000000000000000000000000000000000000..73dc937a6b1b199c87ef2679efda14f8443708c9 --- /dev/null +++ b/hermesv3_bu/io_server/io_raster.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python + +import sys +import os +import timeit +from warnings import warn +from mpi4py import MPI +import rasterio +from rasterio.mask import mask +import geopandas as gpd +import pandas as pd +import numpy as np +from shapely.geometry import Polygon + + +from hermesv3_bu.io_server.io_server import IoServer + + +class IoRaster(IoServer): + def __init__(self, comm=None): + if comm is None: + comm = MPI.COMM_WORLD + super(IoRaster, self).__init__(comm) + + def clip_raster_with_shapefile(self, raster_path, shape_path, clipped_raster_path, values=None, nodata=0): + """ + Clip a raster using given shapefile path. + + The clip is performed only by the selected rank process. + + :param raster_path: Path to the raster to clip. + :type raster_path: str + + :param shape_path: Path to the shapefile with the polygons where clip the input raster. + :type shape_path: str + + :param clipped_raster_path: Place to store the clipped raster. + :type clipped_raster_path: str + + :param values: List of data values to clip. + :type values: list + + :return: Path where is stored the clipped raster. + :rtype: str + """ + def getFeatures(gdf): + """ + https://automating-gis-processes.github.io/CSC18/lessons/L6/clipping-raster.html + Function to parse features from GeoDataFrame in such a manner that rasterio wants them""" + import json + return [json.loads(gdf.to_json())['features'][0]['geometry']] + + data = rasterio.open(raster_path) + geo = gpd.read_file(shape_path) + if len(geo) > 1: + geo = gpd.GeoDataFrame(geometry=[geo.geometry.unary_union], crs=geo.crs) + geo = geo.to_crs(crs=data.crs.data) + coords = getFeatures(geo) + + out_img, out_transform = mask(data, shapes=coords, crop=True, all_touched=True, nodata=nodata) + if values is not None: + out_img[~np.isin(out_img, values)] = nodata + + out_meta = data.meta.copy() + + out_meta.update({ + "driver": "GTiff", + "height": out_img.shape[1], + "width": out_img.shape[2], + "transform": out_transform, + "crs": data.crs}) + if not os.path.exists(os.path.dirname(clipped_raster_path)): + os.makedirs(os.path.dirname(clipped_raster_path)) + dst = rasterio.open(clipped_raster_path, "w", **out_meta) + dst.write(out_img) + + return clipped_raster_path + + def clip_raster_with_shapefile_poly(self, raster_path, geo, clipped_raster_path, values=None, nodata=0): + """ + Clip a raster using given shapefile. + + The clip is performed only by the master (rank 0) process. + + :param raster_path: Path to the raster to clip. + :type raster_path: str + + :param geo: Shapefile with the polygons where clip the input raster. + :type geo: GeoDataFrame + + :param clipped_raster_path: Place to store the clipped raster. + :type clipped_raster_path: str + + :param values: List of data values to clip. + :type values: list + + :param nodata: Value for the no data elements. Default 0 + :type nodata: float + + :return: Path where is stored the clipped raster. + :rtype: str + """ + def get_features(gdf): + """ + https://automating-gis-processes.github.io/CSC18/lessons/L6/clipping-raster.html + Function to parse features from GeoDataFrame in such a manner that rasterio wants them""" + import json + return [json.loads(gdf.to_json())['features'][0]['geometry']] + + data = rasterio.open(raster_path) + + if len(geo) > 1: + geo = gpd.GeoDataFrame(geometry=[geo.geometry.unary_union], crs=geo.crs) + geo = geo.to_crs(crs=data.crs.data) + coords = get_features(geo) + + out_img, out_transform = mask(data, shapes=coords, crop=True, all_touched=True, nodata=nodata) + if values is not None: + out_img[~np.isin(out_img, values)] = nodata + out_meta = data.meta.copy() + + out_meta.update( + { + "driver": "GTiff", + "height": out_img.shape[1], + "width": out_img.shape[2], + "transform": out_transform, + "crs": data.crs + }) + if not os.path.exists(os.path.dirname(clipped_raster_path)): + os.makedirs(os.path.dirname(clipped_raster_path)) + dst = rasterio.open(clipped_raster_path, "w", **out_meta) + dst.write(out_img) + + return clipped_raster_path + + def create_bounds(self, coordinates, inc, number_vertices=2, inverse=False): + """ + Calculate the vertices coordinates. + + :param coordinates: Coordinates in degrees (latitude or longitude) + :type coordinates: numpy.array + + :param inc: Increment between center values. + :type inc: float + + :param number_vertices: Non mandatory parameter that informs the number of vertices that must have the + boundaries (by default 2). + :type number_vertices: int + + :param inverse: For some grid latitudes. + :type inverse: bool + + :return: Array with as many elements as vertices for each value of coords. + :rtype: numpy.array + """ + spent_time = timeit.default_timer() + # Create new arrays moving the centers half increment less and more. + coords_left = coordinates - inc / 2 + coords_right = coordinates + inc / 2 + + # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. + if number_vertices == 2: + # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell + bound_coords = np.dstack((coords_left, coords_right)) + bound_coords = bound_coords.reshape((len(coordinates), number_vertices)) + elif number_vertices == 4: + # Create an array of N arrays of 4 elements to store the corner values for each cell + # It can be stored in clockwise starting form the left-top element, or in inverse mode. + if inverse: + bound_coords = np.dstack((coords_left, coords_left, coords_right, coords_right)) + else: + bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) + else: + raise ValueError('ERROR: The number of vertices of the boundaries must be 2 or 4.') + # self.logger.write_time_log('IoRaster', 'create_bounds', timeit.default_timer() - spent_time, 3) + return bound_coords + + def to_shapefile(self, raster_path, out_path=None, write=False, crs=None, rank=0, nodata=0): + """ + + :param raster_path: + :param out_path: + :param write: + :param crs: + :param rank: + :param nodata: + :return: + """ + + if self.comm.Get_rank() == rank: + gdf = self.to_shapefile_serie(raster_path, out_path=out_path, write=write, crs=crs, nodata=nodata) + else: + gdf = None + + if self.comm.Get_size() > 1: + gdf = self.comm.bcast(gdf, root=0) + + return gdf + + def to_shapefile_serie(self, raster_path, out_path=None, write=False, crs=None, nodata=0): + """ + + :param raster_path: + :param out_path: + :param write: + :param crs: + :param nodata: + :return: + """ + + if out_path is None or not os.path.exists(out_path): + import rasterio + from rasterio.features import shapes + mask = None + src = rasterio.open(raster_path) + image = src.read(1) # first band + image = image.astype(np.float32) + geoms = ( + {'properties': {'data': v}, 'geometry': s} + for i, (s, v) in enumerate(shapes(image, mask=mask, transform=src.transform))) + + gdf = gpd.GeoDataFrame.from_features(geoms) + + gdf.loc[:, 'CELL_ID'] = xrange(len(gdf)) + gdf = gdf[gdf['data'] != nodata] + + gdf.crs = src.crs + + if crs is not None: + gdf = gdf.to_crs(crs) + + if write: + if not os.path.exists(os.path.dirname(out_path)): + os.makedirs(os.path.dirname(out_path)) + gdf.to_file(out_path) + + else: + gdf = gpd.read_file(out_path) + + return gdf diff --git a/hermesv3_bu/io_server/io_server.py b/hermesv3_bu/io_server/io_server.py new file mode 100755 index 0000000000000000000000000000000000000000..694798fd68072c67125102568fc2bfdf625474b2 --- /dev/null +++ b/hermesv3_bu/io_server/io_server.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python + + +class IoServer(object): + def __init__(self, comm): + self.comm = comm diff --git a/hermesv3_bu/io_server/io_shapefile.py b/hermesv3_bu/io_server/io_shapefile.py new file mode 100755 index 0000000000000000000000000000000000000000..f2c89d6b42010f53a186f07e011530e2d7483c3a --- /dev/null +++ b/hermesv3_bu/io_server/io_shapefile.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python + +import sys +import os +from timeit import default_timer as gettime +from warnings import warn +import numpy as np +import pandas as pd +import geopandas as gpd +from mpi4py import MPI + +from hermesv3_bu.io_server.io_server import IoServer + + +class IoShapefile(IoServer): + def __init__(self, comm=None): + if comm is None: + comm = MPI.COMM_WORLD + + super(IoShapefile, self).__init__(comm) + + def write_shapefile_serial(self, data, path): + """ + + :param data: GeoDataset to be written + :type data: GeoDataFrame + + :param path: + + :return: True when the writing is finished. + :rtype: bool + """ + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + data.to_file(path) + + return True + + def write_shapefile_parallel(self, data, path, rank=0): + """ + + :param data: GeoDataset to be written + :type data: GeoDataFrame + + :param path: + + :return: True when the writing is finished. + :rtype: bool + """ + data = self.comm.gather(data, root=rank) + if self.comm.Get_rank() == rank: + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + data = pd.concat(data) + data.to_file(path) + + self.comm.Barrier() + + return True + + def read_shapefile_serial(self, path): + + gdf = gpd.read_file(path) + + return gdf + + def read_shapefile(self, path, rank=0): + if self.comm.Get_rank() == rank: + gdf = gpd.read_file(path) + gdf = np.array_split(gdf, self.comm.Get_size()) + else: + gdf = None + + gdf = self.comm.scatter(gdf, root=rank) + + return gdf + + def read_shapefile_parallel(self, path, rank=0): + if self.comm.Get_rank() == rank: + data = self.read_shapefile_serial(path) + else: + data = None + + data = self.split_shapefile(data, rank) + + return data + + def split_shapefile(self, data, rank=0): + + if self.comm.Get_size() == 1: + data = data + else: + if self.comm.Get_rank() == rank: + data = np.array_split(data, self.comm.Get_size()) + else: + data = None + data = self.comm.scatter(data, root=rank) + + return data + + def balance(self, data, rank=0): + + data = self.comm.gather(data, root=rank) + if self.comm.Get_rank() == rank: + data = pd.concat(data) + data = np.array_split(data, self.comm.Get_size()) + else: + data = None + + data = self.comm.scatter(data, root=rank) + + return data diff --git a/hermesv3_bu/modules/point_source/__init__.py b/hermesv3_bu/logger/__init__.py similarity index 100% rename from hermesv3_bu/modules/point_source/__init__.py rename to hermesv3_bu/logger/__init__.py diff --git a/hermesv3_bu/logger/log.py b/hermesv3_bu/logger/log.py new file mode 100644 index 0000000000000000000000000000000000000000..5c1b3caf389744f52d9bda560a987be495131bc3 --- /dev/null +++ b/hermesv3_bu/logger/log.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +import os +import numpy as np +import pandas as pd + + +class Log(object): + def __init__(self, comm, arguments, log_refresh=1, time_log_refresh=0): + """ + Initialise the Log class. + + :param comm: MPI communicator + + :param arguments: Complete argument NameSpace. + :type arguments: NameSpace + + :param log_refresh: + :param time_log_refresh: + """ + self.comm = comm + + self.refresh_rate = (log_refresh, time_log_refresh) + self.log_refresh = self.refresh_rate[0] + self.time_log_refresh = self.refresh_rate[1] + + self.log_level = arguments.log_level + self.log_path = os.path.join(arguments.output_dir, 'logs', 'Log_r{0:04d}_p{1:04d}_{2}.log'.format( + comm.Get_rank(), comm.Get_size(), os.path.basename(arguments.output_name).replace('.nc', ''))) + self.time_log_path = os.path.join(arguments.output_dir, 'logs', 'Times_p{0:04d}_{1}.csv'.format( + comm.Get_size(), os.path.basename(arguments.output_name).replace('.nc', ''))) + + if comm.Get_rank() == 0: + if not os.path.exists(os.path.dirname(self.log_path)): + os.makedirs(os.path.dirname(self.log_path)) + else: + if os.path.exists(self.time_log_path): + os.remove(self.time_log_path) + self.time_log = open(self.time_log_path, mode='w') + else: + # Time log only writed by master process + self.time_log = None + comm.Barrier() + + if os.path.exists(self.log_path): + os.remove(self.log_path) + + self.log = open(self.log_path, mode='w') + + self.df_times = pd.DataFrame(columns=['Class', 'Function', comm.Get_rank()]) + + def write_log(self, message, message_level=1): + """ + Write the log message. + + The log will be refresh every log_refresh value messages. + + :param message: Message to write. + :type message: str + + :param message_level: Importance of the message. From 1 (bottom) to 3 (top). Default 1 + :type message_level: int + + :return: True if everything is ok. + :rtype: bool + """ + if message_level <= self.log_level: + self.log.write("{0}\n".format(message)) + + if self.log_refresh > 0: + self.log_refresh -= 1 + if self.log_refresh == 0: + self.log.flush() + self.log_refresh = self.refresh_rate[0] + return True + + def _write_csv_times_log_file(self, rank=0): + """ + Write the times log CSV file. + + :param rank: Process to write. + :type rank: int + + :return: True if everything is ok. + :rtype: bool + """ + self.df_times = self.df_times.groupby(['Class', 'Function']).sum().reset_index() + data_frames = self.comm.gather(self.df_times, root=0) + if self.comm.Get_rank() == rank: + df_merged = reduce(lambda left, right: pd.merge(left, right, on=['Class', 'Function'], how='outer'), + data_frames) + df_merged = df_merged.groupby(['Class', 'Function']).sum() + df_merged['min'] = df_merged.loc[:, range(self.comm.Get_size())].min(axis=1) + df_merged['max'] = df_merged.loc[:, range(self.comm.Get_size())].max(axis=1) + df_merged['mean'] = df_merged.loc[:, range(self.comm.Get_size())].mean(axis=1) + + df_merged = df_merged.replace(0.0, np.NaN) + df_merged.to_csv(self.time_log_path) + + self.comm.Barrier() + return True + + def write_time_log(self, class_name, function_name, spent_time, message_level=1): + """ + Add times to be written. Master process will write that log every times_log_refresh received messages. + + :param class_name: Name of the class. + :type class_name: str + + :param function_name: Name of the function. + :type function_name: str + + :param spent_time: Time spent in the function. + :type spent_time: float + + :param message_level: Importance of the message. From 1 (bottom) to 3 (top). Default 1 + :type message_level: int + + :return: True if everything is ok. + :rtype: bool + """ + if message_level <= self.log_level: + self.df_times = self.df_times.append( + {'Class': class_name, 'Function': function_name, self.comm.Get_rank(): spent_time}, ignore_index=True) + # if self.time_log_refresh > 0: + # self.time_log_refresh -= 1 + # if self.time_log_refresh == 0: + # + # self._write_csv_times_log_file() + # self.time_log_refresh = self.refresh_rate[0] + return True + + def finish_logs(self): + """ + Finalize the log files. + + :return: + """ + self._write_csv_times_log_file() + self.log.flush() + self.log.close() diff --git a/hermesv3_bu/modules/grids/grid.py b/hermesv3_bu/modules/grids/grid.py deleted file mode 100644 index 0c424d1ecd90aa27f068164d514c93216535a649..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/grids/grid.py +++ /dev/null @@ -1,549 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import os -import sys -import timeit -import numpy as np -import ESMF -import hermesv3_gr.config.settings as settings - - -class Grid(object): - """ - Grid object that contains the information of the output grid. - - :param grid_type: Type of the output grid [global, rotated, lcc, mercator]. - :type grid_type: str - - :param vertical_description_path: Path to the file that contains the vertical description. - :type vertical_description_path: str - - :param temporal_path: Path to the temporal folder. - :type temporal_path: str - """ - - def __init__(self, grid_type, vertical_description_path, temporal_path): - st_time = timeit.default_timer() - # settings.write_log('Creating Grid...', level=1) - - # Defining class atributes - self.procs_array = None - self.nrows = 0 - self.ncols = 0 - - self.grid_type = grid_type - self.vertical_description = self.set_vertical_levels(vertical_description_path) - self.center_latitudes = None - self.center_longitudes = None - self.boundary_latitudes = None - self.boundary_longitudes = None - - self.cell_area = None - if settings.rank == 0: - if not os.path.exists(os.path.join(temporal_path)): - os.makedirs(os.path.join(temporal_path)) - settings.comm.Barrier() - - self.coords_netcdf_file = os.path.join(temporal_path, 'temporal_coords.nc') - self.temporal_path = temporal_path - self.shapefile_path = None - - self.esmf_grid = None - self.x_lower_bound = None - self.x_upper_bound = None - self.y_lower_bound = None - self.y_upper_bound = None - self.shape = None - - self.crs = None - - settings.write_time('Grid', 'Init', timeit.default_timer() - st_time, level=1) - - @staticmethod - def create_esmf_grid_from_file(file_name, sphere=True): - import ESMF - - st_time = timeit.default_timer() - settings.write_log('\t\tCreating ESMF grid from file {0}'.format(file_name), level=3) - - # ESMF.Manager(debug=True) - - grid = ESMF.Grid(filename=file_name, filetype=ESMF.FileFormat.GRIDSPEC, is_sphere=sphere, - add_corner_stagger=True) - - settings.write_time('Grid', 'create_esmf_grid_from_file', timeit.default_timer() - st_time, level=3) - return grid - - @staticmethod - def select_grid(grid_type, vertical_description_path, timestep_num, temporal_path, inc_lat, inc_lon, - centre_lat, centre_lon, west_boundary, south_boundary, inc_rlat, inc_rlon, - lat_1, lat_2, lon_0, lat_0, nx, ny, inc_x, inc_y, x_0, y_0, lat_ts): - # TODO describe better the rotated parameters - """ - Create a Grid object depending on the grid type. - - :param grid_type: type of grid to create [global, rotated, lcc, mercator] - :type grid_type: str - - :param vertical_description_path: Path to the file that contains the vertical description. - :type vertical_description_path: str - - :param timestep_num: Number of timesteps. - :type timestep_num: int - - :param temporal_path: Path to the temporal folder. - :type temporal_path: str - - :param inc_lat: [global] Increment between latitude centroids (degrees). - :type inc_lat: float - - :param inc_lon: [global] Increment between longitude centroids (degrees). - :type inc_lon: float - - :param centre_lat: [rotated] - :type centre_lat: float - - :param centre_lon: [rotated] - :type centre_lon: float - - :param west_boundary: [rotated] - :type west_boundary: float - - :param south_boundary: [rotated] - :type south_boundary: float - - :param inc_rlat: [rotated] Increment between rotated latitude centroids (degrees). - :type inc_rlat: float - - :param inc_rlon: [rotated] Increment between rotated longitude centroids (degrees). - :type inc_rlon: float - - :param lat_ts: [mercator] - :type lat_ts: float - - :param lat_1: [lcc] Value of the Lat1 for the LCC grid type. - :type lat_1: float - - :param lat_2: [lcc] Value of the Lat2 for the LCC grid type. - :type lat_2: float - - :param lon_0: [lcc, mercator] Value of the Lon0 for the LCC grid type. - :type lon_0: float - - :param lat_0: [lcc] Value of the Lat0 for the LCC grid type. - :type lat_0: float - - :param nx: [lcc, mercator] Number of cells on the x dimension. - :type nx: int - - :param ny: [lcc, mercator] Number of cells on the y dimension. - :type ny: int - - :param inc_x: [lcc, mercator] Increment between x dimensions cell centroids (metres). - :type inc_x: int - - :param inc_y: [lcc, mercator] Increment between y dimensions cell centroids (metres). - :type inc_y: int - - :param x_0: [lcc, mercator] Value of the X0 for the LCC grid type. - :type x_0: float - - :param y_0: [lcc, mercator] Value of the Y0 for the LCC grid type. - :type y_0: float - - :return: Grid object. It will return a GlobalGrid, RotatedGrid or LccGrid depending on the type. - :rtype: Grid - """ - - st_time = timeit.default_timer() - settings.write_log('Selecting grid', level=1) - - # Creating a different object depending on the grid type - if grid_type == 'global': - from hermesv3_gr.modules.grids.grid_global import GlobalGrid - grid = GlobalGrid(grid_type, vertical_description_path, timestep_num, temporal_path, inc_lat, inc_lon) - - elif grid_type == 'rotated': - from hermesv3_gr.modules.grids.grid_rotated import RotatedGrid - grid = RotatedGrid(grid_type, vertical_description_path, timestep_num, temporal_path, - centre_lat, centre_lon, west_boundary, south_boundary, inc_rlat, inc_rlon) - - elif grid_type == 'lcc': - from hermesv3_gr.modules.grids.grid_lcc import LccGrid - grid = LccGrid(grid_type, vertical_description_path, timestep_num, temporal_path, lat_1, lat_2, lon_0, - lat_0, nx, ny, inc_x, inc_y, x_0, y_0) - - elif grid_type == 'mercator': - from hermesv3_gr.modules.grids.grid_mercator import MercatorGrid - grid = MercatorGrid(grid_type, vertical_description_path, timestep_num, temporal_path, lat_ts, lon_0, - nx, ny, inc_x, inc_y, x_0, y_0) - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise NotImplementedError("The grid type {0} is not implemented.".format(grid_type) - + " Use 'global', 'rotated' or 'lcc'.") - sys.exit(1) - - settings.write_time('Grid', 'select_grid', timeit.default_timer() - st_time, level=3) - - return grid - - @staticmethod - def set_vertical_levels(vertical_description_path): - """ - Extract the vertical levels. - - :param vertical_description_path: path to the file that contain the vertical description of the required output - file. - :type vertical_description_path: str - - :return: Vertical levels. - :rtype: list of int - """ - import pandas as pd - - st_time = timeit.default_timer() - settings.write_log('\t\tSetting vertical levels', level=3) - - df = pd.read_csv(vertical_description_path, sep=';') - - heights = df.height_magl.values - - settings.write_time('Grid', 'set_vertical_levels', timeit.default_timer() - st_time, level=3) - - return heights - - def write_coords_netcdf(self): - """ - Writes the temporal file with the coordinates of the output needed to generate the weight matrix. - If it is already well created it will only add the cell_area parameter. - """ - # TODO Not to write two NetCDF. Open one and modify it. - from hermesv3_gr.tools.netcdf_tools import write_netcdf - - st_time = timeit.default_timer() - settings.write_log('\twrite_coords_netcdf', level=3) - - if not self.chech_coords_file(): - # Writes an auxiliary empty NetCDF only with the coordinates and an empty variable. - write_netcdf(self.coords_netcdf_file, self.center_latitudes, self.center_longitudes, - [{'name': 'var_aux', 'units': '', 'data': 0}], - boundary_latitudes=self.boundary_latitudes, boundary_longitudes=self.boundary_longitudes, - regular_latlon=True) - - # Calculate the cell area of the auxiliary NetCDF file - self.cell_area = self.get_cell_area() - - # Re-writes the NetCDF adding the cell area - write_netcdf(self.coords_netcdf_file, self.center_latitudes, self.center_longitudes, - [{'name': 'var_aux', 'units': '', 'data': 0}], - cell_area=self.cell_area, boundary_latitudes=self.boundary_latitudes, - boundary_longitudes=self.boundary_longitudes, regular_latlon=True) - else: - self.cell_area = self.get_cell_area() - - settings.write_time('Grid', 'write_coords_netcdf', timeit.default_timer() - st_time, level=3) - - def get_cell_area(self): - """ - Calculate the cell area of the grid. - - :return: Area of each cell of the grid. - :rtype: numpy.array - """ - from cdo import Cdo - from netCDF4 import Dataset - - st_time = timeit.default_timer() - settings.write_log('\t\tGetting cell area from {0}'.format(self.coords_netcdf_file), level=3) - - # Initialises the CDO - cdo = Cdo() - # Create a temporal file 's' with the cell area - s = cdo.gridarea(input=self.coords_netcdf_file) - # Get the cell area of the temporal file - nc_aux = Dataset(s, mode='r') - cell_area = nc_aux.variables['cell_area'][:] - nc_aux.close() - - settings.write_time('Grid', 'get_cell_area', timeit.default_timer() - st_time, level=3) - - return cell_area - - @staticmethod - def create_regular_grid_1d_array(center, inc, boundary): - """ - Create a regular grid giving the center, boundary and increment. - - :param center: Center of the coordinates. - :type center: float - - :param inc: Resolution: Increment between cells. - :type inc: float - - :param boundary: Limit of the coordinates: Distance between the first cell and the center. - :type boundary: float - - :return: 1D array with the coordinates. - :rtype: numpy.array - """ - - st_time = timeit.default_timer() - - # Calculate first center point. - origin = center - abs(boundary) - # Calculate the quantity of cells. - n = (abs(boundary) / inc) * 2 - # Calculate all the values - values = np.arange(origin + inc, origin + (n * inc) - inc + inc / 2, inc, dtype=np.float) - - settings.write_time('Grid', 'create_regular_grid_1d_array', timeit.default_timer() - st_time, level=3) - - return values - - @staticmethod - def create_bounds(coords, inc, number_vertices=2, inverse=False): - """ - Calculate the vertices coordinates. - - :param coords: Coordinates in degrees (latitude or longitude) - :type coords: numpy.array - - :param inc: Increment between center values. - :type inc: float - - :param number_vertices: Non mandatory parameter that informs the number of vertices that must have the - boundaries (by default 2). - :type number_vertices: int - - :param inverse: For some grid latitudes. - :type inverse: bool - - :return: Array with as many elements as vertices for each value of coords. - :rtype: numpy.array - """ - st_time = timeit.default_timer() - settings.write_log('\t\t\tCreating boundaries.', level=3) - - # Create new arrays moving the centers half increment less and more. - coords_left = coords - inc / 2 - coords_right = coords + inc / 2 - - # Defining the number of corners needed. 2 to regular grids and 4 for irregular ones. - if number_vertices == 2: - # Create an array of N arrays of 2 elements to store the floor and the ceil values for each cell - bound_coords = np.dstack((coords_left, coords_right)) - bound_coords = bound_coords.reshape((len(coords), number_vertices)) - elif number_vertices == 4: - # Create an array of N arrays of 4 elements to store the corner values for each cell - # It can be stored in clockwise starting form the left-top element, or in inverse mode. - if inverse: - bound_coords = np.dstack((coords_left, coords_left, coords_right, coords_right)) - - else: - bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) - else: - if settings.rank == 0: - raise ValueError('ERROR: The number of vertices of the boundaries must be 2 or 4.') - settings.write_log('ERROR: Check the .err file to get more info.') - sys.exit(1) - - settings.write_time('Grid', 'create_bounds', timeit.default_timer() - st_time, level=3) - - return bound_coords - - def get_coordinates_2d(self): - """ - Returns the coordinates but in a 2D format. - - A regular grid only needs two 1D arrays (latitudes and longitudes) to define a grid. - This method is to convert this two 1D arrays into 2D arrays replicating the info of each value. - - :return: Tuple with 2 fields, the first the 2D latitude coordinate, and the second for the 2D longitude - coordinate. - :rtype: tuple - """ - st_time = timeit.default_timer() - settings.write_log('\t\tGetting 2D coordinates from ESMPy Grid', level=3) - - lat = self.esmf_grid.get_coords(1, ESMF.StaggerLoc.CENTER).T - lon = self.esmf_grid.get_coords(0, ESMF.StaggerLoc.CENTER).T - - settings.write_time('Grid', 'get_coordinates_2d', timeit.default_timer() - st_time, level=3) - - return lat, lon - - def is_shapefile(self): - return os.path.exists(self.shapefile_path) - - def to_shapefile(self, full_grid=True): - import geopandas as gpd - import pandas as pd - from shapely.geometry import Polygon - - st_time = timeit.default_timer() - # settings.write_log('\t\tGetting grid shapefile', level=3) - - if full_grid: - self.shapefile_path = os.path.join(self.temporal_path, 'shapefile') - else: - self.shapefile_path = os.path.join(self.temporal_path, 'shapefiles_n{0}'.format(settings.size)) - - if settings.rank == 0: - if not os.path.exists(self.shapefile_path): - os.makedirs(self.shapefile_path) - if full_grid: - self.shapefile_path = os.path.join(self.shapefile_path, 'grid_shapefile.shp') - else: - self.shapefile_path = os.path.join(self.shapefile_path, 'grid_shapefile_{0}.shp'.format(settings.rank)) - - done = self.is_shapefile() - - if not done: - settings.write_log('\t\tGrid shapefile not done. Lets try to create it.', level=3) - # Create Shapefile - - # Use the meters coordiantes to create the shapefile - - y = self.boundary_latitudes - x = self.boundary_longitudes - # sys.exit() - - if self.grid_type == 'global': - x = x.reshape((x.shape[1], x.shape[2])) - y = y.reshape((y.shape[1], y.shape[2])) - - # x_aux = np.empty((x.shape[0], y.shape[0], 4)) - # x_aux[:, :, 0] = x[:, np.newaxis, 0] - # x_aux[:, :, 1] = x[:, np.newaxis, 1] - # x_aux[:, :, 2] = x[:, np.newaxis, 1] - # x_aux[:, :, 3] = x[:, np.newaxis, 0] - aux_shape = (y.shape[0], x.shape[0], 4) - x_aux = np.empty(aux_shape) - x_aux[:, :, 0] = x[np.newaxis, :, 0] - x_aux[:, :, 1] = x[np.newaxis, :, 1] - x_aux[:, :, 2] = x[np.newaxis, :, 1] - x_aux[:, :, 3] = x[np.newaxis, :, 0] - - x = x_aux - # print x - del x_aux - - # y_aux = np.empty((x.shape[0], y.shape[0], 4)) - # y_aux[:, :, 0] = y[np.newaxis, :, 0] - # y_aux[:, :, 1] = y[np.newaxis, :, 0] - # y_aux[:, :, 2] = y[np.newaxis, :, 1] - # y_aux[:, :, 3] = y[np.newaxis, :, 1] - - y_aux = np.empty(aux_shape) - y_aux[:, :, 0] = y[:, np.newaxis, 0] - y_aux[:, :, 1] = y[:, np.newaxis, 0] - y_aux[:, :, 2] = y[:, np.newaxis, 1] - y_aux[:, :, 3] = y[:, np.newaxis, 1] - - # print y_aux - y = y_aux - del y_aux - - # exit() - - if not full_grid: - y = y[self.x_lower_bound:self.x_upper_bound, self.y_lower_bound:self.y_upper_bound, :] - x = x[self.x_lower_bound:self.x_upper_bound, self.y_lower_bound:self.y_upper_bound, :] - - aux_b_lats = y.reshape((y.shape[0] * y.shape[1], y.shape[2])) - aux_b_lons = x.reshape((x.shape[0] * x.shape[1], x.shape[2])) - - # The regular lat-lon projection has only 2 (laterals) points for each cell instead of 4 (corners) - # if aux_b_lats.shape[1] == 2: - # aux_b = np.empty((aux_b_lats.shape[0], 4)) - # aux_b[:, 0] = aux_b_lats[:, 0] - # aux_b[:, 1] = aux_b_lats[:, 0] - # aux_b[:, 2] = aux_b_lats[:, 1] - # aux_b[:, 3] = aux_b_lats[:, 1] - # aux_b_lats = aux_b - # - # if aux_b_lons.shape[1] == 2: - # aux_b = np.empty((aux_b_lons.shape[0], 4)) - # aux_b[:, 0] = aux_b_lons[:, 0] - # aux_b[:, 1] = aux_b_lons[:, 1] - # aux_b[:, 2] = aux_b_lons[:, 1] - # aux_b[:, 3] = aux_b_lons[:, 0] - # aux_b_lons = aux_b - - # Create one dataframe with 8 columns, 4 points with two coordinates each one - df_lats = pd.DataFrame(aux_b_lats, columns=['b_lat_1', 'b_lat_2', 'b_lat_3', 'b_lat_4']) - df_lons = pd.DataFrame(aux_b_lons, columns=['b_lon_1', 'b_lon_2', 'b_lon_3', 'b_lon_4']) - df = pd.concat([df_lats, df_lons], axis=1) - - # Substituate 8 columns by 4 with the two coordinates - df['p1'] = zip(df.b_lon_1, df.b_lat_1) - del df['b_lat_1'], df['b_lon_1'] - df['p2'] = zip(df.b_lon_2, df.b_lat_2) - del df['b_lat_2'], df['b_lon_2'] - df['p3'] = zip(df.b_lon_3, df.b_lat_3) - del df['b_lat_3'], df['b_lon_3'] - df['p4'] = zip(df.b_lon_4, df.b_lat_4) - del df['b_lat_4'], df['b_lon_4'] - - # Make a list of list of tuples - # [[(point_1.1), (point_1.2), (point_1.3), (point_1.4)], - # [(point_2.1), (point_2.2), (point_2.3), (point_2.4)], ...] - list_points = df.as_matrix() - del df['p1'], df['p2'], df['p3'], df['p4'] - - # List of polygons from the list of points - geometry = [Polygon(list(points)) for points in list_points] - # geometry = [] - # for point in list_points: - # print point - # geometry.append(Polygon(list(point))) - # print geometry[0] - # sys.exit() - # print len(geometry), len(df), - - gdf = gpd.GeoDataFrame(df, crs={'init': 'epsg:4326'}, geometry=geometry) - gdf = gdf.to_crs(self.crs) - - gdf['FID'] = gdf.index - - gdf.to_file(self.shapefile_path) - else: - settings.write_log('\t\tGrid shapefile already done. Lets try to read it.', level=3) - gdf = gpd.read_file(self.shapefile_path) - - settings.write_time('Grid', 'to_shapefile', timeit.default_timer() - st_time, level=1) - - return gdf - - def chech_coords_file(self): - """ - Checks if the auxiliary coordinates file is created well. - - :return: True: if it is well created. - :rtype: bool - """ - # TODO better check by partition size - return os.path.exists(self.coords_netcdf_file) - - -if __name__ == '__main__': - pass diff --git a/hermesv3_bu/modules/grids/grid_global.py b/hermesv3_bu/modules/grids/grid_global.py deleted file mode 100644 index ebbd97ec7df515ef37bbaf9f2b04b032d99011ae..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/grids/grid_global.py +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import os -import sys -import timeit - -import hermesv3_gr.config.settings as settings -from grid import Grid - - -class GlobalGrid(Grid): - """ - Global grid object that contains all the information to do a global output. - - :param grid_type: Type of the output grid [global, rotated, lcc, mercator]. - :type grid_type: str - - :param vertical_description_path: Path to the file that contains the vertical description. - :type vertical_description_path: str - - :param timestep_num: Number of timesteps. - :type timestep_num: int - - :param temporal_path: Path to the temporal folder. - :type temporal_path: str - - :param inc_lat: Increment between latitude centroids. - :type inc_lat: float - - :param inc_lon: Increment between longitude centroids. - :type inc_lon: float - - :param center_longitude: Location of the longitude of the center cell. - Default = 0 - :type center_longitude: float - """ - - def __init__(self, grid_type, vertical_description_path, timestep_num, temporal_path, inc_lat, inc_lon, - center_longitude=float(0)): - import ESMF - - st_time = timeit.default_timer() - settings.write_log('\tCreating Global grid.', level=2) - - # Initialize the class using parent - super(GlobalGrid, self).__init__(grid_type, vertical_description_path, temporal_path) - - self.center_lat = float(0) - self.center_lon = center_longitude - self.inc_lat = inc_lat - self.inc_lon = inc_lon - - self.crs = {'init': 'epsg:4326'} - self.create_coords() - - if not os.path.exists(self.coords_netcdf_file): - if settings.rank == 0: - super(GlobalGrid, self).write_coords_netcdf() - settings.comm.Barrier() - - self.esmf_grid = super(GlobalGrid, self).create_esmf_grid_from_file(self.coords_netcdf_file) - - self.x_lower_bound = self.esmf_grid.lower_bounds[ESMF.StaggerLoc.CENTER][1] - self.x_upper_bound = self.esmf_grid.upper_bounds[ESMF.StaggerLoc.CENTER][1] - self.y_lower_bound = self.esmf_grid.lower_bounds[ESMF.StaggerLoc.CENTER][0] - self.y_upper_bound = self.esmf_grid.upper_bounds[ESMF.StaggerLoc.CENTER][0] - - self.shape = (timestep_num, len(self.vertical_description), self.x_upper_bound-self.x_lower_bound, - self.y_upper_bound-self.y_lower_bound) - - self.cell_area = self.get_cell_area()[self.x_lower_bound:self.x_upper_bound, - self.y_lower_bound:self.y_upper_bound] - - settings.write_time('GlobalGrid', 'Init', timeit.default_timer() - st_time, level=1) - - def create_coords(self): - """ - Create the coordinates for a global domain. - """ - import numpy as np - - st_time = timeit.default_timer() - settings.write_log('\t\tCreating global coordinates', level=3) - - self.center_latitudes = self.create_regular_grid_1d_array(self.center_lat, self.inc_lat, -90) - self.boundary_latitudes = self.create_bounds(self.center_latitudes, self.inc_lat) - - # ===== Longitudes ===== - self.center_longitudes = self.create_regular_grid_1d_array(self.center_lon, self.inc_lon, -180) - if len(self.center_longitudes)//2 < settings.size: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise AttributeError("ERROR: Maximum number of processors exceeded. " + - "It has to be less or equal than {0}.".format(len(self.center_longitudes)//2)) - sys.exit(1) - self.boundary_longitudes = self.create_bounds(self.center_longitudes, self.inc_lon) - - # Creating special cells with half cell on le left and right border - lat_origin = self.center_lat - abs(-90) - lon_origin = self.center_lon - abs(-180) - n_lat = (abs(-90) / self.inc_lat) * 2 - n_lon = (abs(-180) / self.inc_lon) * 2 - self.center_latitudes = np.concatenate([ - [lat_origin + self.inc_lat / 2 - self.inc_lat / 4], self.center_latitudes, - [lat_origin + (n_lat * self.inc_lat) - self.inc_lat / 2 + self.inc_lat / 4]]) - - self.center_longitudes = np.concatenate([ - [lon_origin + self.inc_lon / 2 - self.inc_lon / 4], self.center_longitudes, - [lon_origin + (n_lon * self.inc_lon) - self.inc_lon / 2 + self.inc_lon / 4]]) - - self.boundary_latitudes = np.concatenate([ - [[lat_origin, lat_origin + self.inc_lat / 2]], self.boundary_latitudes, - [[lat_origin + (n_lat * self.inc_lat) - self.inc_lat / 2, lat_origin + (n_lat * self.inc_lat)]]]) - - self.boundary_longitudes = np.concatenate([ - [[lon_origin, lon_origin + self.inc_lon / 2]], self.boundary_longitudes, - [[lon_origin + (n_lon * self.inc_lon) - self.inc_lon / 2, lon_origin + (n_lon * self.inc_lon)]]],) - - self.boundary_latitudes = self.boundary_latitudes.reshape((1,) + self.boundary_latitudes.shape) - self.boundary_longitudes = self.boundary_longitudes.reshape((1,) + self.boundary_longitudes.shape) - - settings.write_time('GlobalGrid', 'create_coords', timeit.default_timer() - st_time, level=2) - - -if __name__ == '__main__': - pass diff --git a/hermesv3_bu/modules/grids/grid_lcc.py b/hermesv3_bu/modules/grids/grid_lcc.py deleted file mode 100644 index 96ea0ec75b35862e4a462aa98438d26eed3ddb51..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/grids/grid_lcc.py +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import os -import sys -import timeit -import hermesv3_gr.config.settings as settings -from grid import Grid - - -class LccGrid(Grid): - """ - Lambert Conformal Conic (LCC) grid object that contains all the information to do a lcc output. - - :param grid_type: Type of the output grid [global, rotated, lcc, mercator]. - :type grid_type: str - - :param vertical_description_path: Path to the file that contains the vertical description. - :type vertical_description_path: str - - :param timestep_num: Number of timesteps. - :type timestep_num: int - - :param temporal_path: Path to the temporal folder. - :type temporal_path: str - - :param lat_1: Value of the Lat1 for the LCC grid type. - :type lat_1: float - - :param lat_2: Value of the Lat2 for the LCC grid type. - :type lat_2: float - - :param lon_0: Value of the Lon0 for the LCC grid type. - :type lon_0: float - - :param lat_0: Value of the Lat0 for the LCC grid type. - :type lat_0: float - - :param nx: Number of cells on the x dimension. - :type nx: int - - :param ny: Number of cells on the y dimension. - :type ny: int - - :param inc_x: Increment between x dimensions cell centroids (metres). - :type inc_x: int - - :param inc_y: Increment between y dimensions cell centroids (metres). - :type inc_y: int - - :param x_0: Value of the X0 for the LCC grid type. - :type x_0: float - - :param y_0: Value of the Y0 for the LCC grid type. - :type y_0: float - - :param earth_radius: Radius of the Earth (metres). - Default = 6370000.000 - :type earth_radius: float - """ - - def __init__(self, grid_type, vertical_description_path, timestep_num, temporal_path, lat_1, lat_2, lon_0, lat_0, - nx, ny, inc_x, inc_y, x_0, y_0, earth_radius=6370000.000): - import ESMF - st_time = timeit.default_timer() - settings.write_log('\tCreating Lambert Conformal Conic (LCC) grid.', level=2) - - # Initialises with parent class - super(LccGrid, self).__init__(grid_type, vertical_description_path, temporal_path) - - # Setting parameters - self.lat_1 = lat_1 - self.lat_2 = lat_2 - self.lon_0 = lon_0 - self.lat_0 = lat_0 - self.nx = nx - self.ny = ny - self.inc_x = inc_x - self.inc_y = inc_y - self.x_0 = x_0 + (inc_x / 2) - self.y_0 = y_0 + (inc_y / 2) - self.earth_radius = earth_radius - - # UTM coordinates - self.x = None - self.y = None - - # Creating coordinates - self.crs = "+proj=lcc +lat_1={0} +lat_2={1} +lat_0={2} +lon_0={3} +x_0={4} +y_0={5} +datum=WGS84".format( - self.lat_1, self.lat_2, self.lat_0, self.lon_0, 0, 0) + " +units=m" - self.create_coords() - - if not os.path.exists(self.coords_netcdf_file): - if settings.rank == 0: - # super(LccGrid, self).write_coords_netcdf() - self.write_coords_netcdf() - settings.comm.Barrier() - - self.esmf_grid = super(LccGrid, self).create_esmf_grid_from_file(self.coords_netcdf_file, sphere=False) - # - self.x_lower_bound = self.esmf_grid.lower_bounds[ESMF.StaggerLoc.CENTER][1] - self.x_upper_bound = self.esmf_grid.upper_bounds[ESMF.StaggerLoc.CENTER][1] - self.y_lower_bound = self.esmf_grid.lower_bounds[ESMF.StaggerLoc.CENTER][0] - self.y_upper_bound = self.esmf_grid.upper_bounds[ESMF.StaggerLoc.CENTER][0] - - self.shape = (timestep_num, len(self.vertical_description), self.x_upper_bound-self.x_lower_bound, - self.y_upper_bound-self.y_lower_bound) - # print 'Rank {0} _3_\n'.format(settings.rank) - settings.comm.Barrier() - # print 'Rank {0} _4_\n'.format(settings.rank) - self.cell_area = self.get_cell_area()[self.x_lower_bound:self.x_upper_bound, - self.y_lower_bound:self.y_upper_bound] - - settings.write_time('LccGrid', 'Init', timeit.default_timer() - st_time, level=1) - - def write_coords_netcdf(self): - """ - Writes the temporal file with the coordinates of the output needed to generate the weight matrix. - If it is already well created it will only add the cell_area parameter. - """ - from hermesv3_gr.tools.netcdf_tools import write_netcdf - - st_time = timeit.default_timer() - settings.write_log('\tWriting {0} file.'.format(self.coords_netcdf_file), level=3) - - if not self.chech_coords_file(): - # Writes an auxiliary empty NetCDF only with the coordinates and an empty variable. - write_netcdf(self.coords_netcdf_file, self.center_latitudes, self.center_longitudes, - [{'name': 'var_aux', 'units': '', 'data': 0}], - boundary_latitudes=self.boundary_latitudes, boundary_longitudes=self.boundary_longitudes, - lcc=True, lcc_x=self.x, lcc_y=self.y, - lat_1_2="{0}, {1}".format(self.lat_1, self.lat_2), lon_0=self.lon_0, lat_0=self.lat_0) - - # Calculate the cell area of the auxiliary NetCDF file - self.cell_area = self.get_cell_area() - - # Re-writes the NetCDF adding the cell area - write_netcdf(self.coords_netcdf_file, self.center_latitudes, self.center_longitudes, - [{'name': 'var_aux', 'units': '', 'data': 0}], - boundary_latitudes=self.boundary_latitudes, boundary_longitudes=self.boundary_longitudes, - cell_area=self.cell_area, - lcc=True, lcc_x=self.x, lcc_y=self.y, - lat_1_2="{0}, {1}".format(self.lat_1, self.lat_2), lon_0=self.lon_0, lat_0=self.lat_0) - else: - self.cell_area = self.get_cell_area() - - settings.write_time('LccGrid', 'write_coords_netcdf', timeit.default_timer() - st_time, level=3) - - def create_coords(self): - """ - Create the coordinates for a lambert conformal conic domain. - """ - import numpy as np - from pyproj import Proj - - st_time = timeit.default_timer() - settings.write_log('\t\tCreating lcc coordinates', level=3) - - # Create a regular grid in metres (Two 1D arrays) - self.x = np.arange(self.x_0, self.x_0 + self.inc_x * self.nx, self.inc_x, dtype=np.float) - if len(self.x)//2 < settings.size: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise AttributeError("ERROR: Maximum number of processors exceeded. " + - "It has to be less or equal than {0}.".format(len(self.x)//2)) - sys.exit(1) - self.y = np.arange(self.y_0, self.y_0 + self.inc_y * self.ny, self.inc_y, dtype=np.float) - - # 1D to 2D - x = np.array([self.x] * len(self.y)) - y = np.array([self.y] * len(self.x)).T - - # Create UTM bounds - y_b = super(LccGrid, self).create_bounds(y, self.inc_y, number_vertices=4, inverse=True) - x_b = super(LccGrid, self).create_bounds(x, self.inc_x, number_vertices=4) - - # Create the LCC projection - projection = Proj( - proj='lcc', - ellps='WGS84', - R=self.earth_radius, - lat_1=self.lat_1, - lat_2=self.lat_2, - lon_0=self.lon_0, - lat_0=self.lat_0, - to_meter=1, - x_0=0, - y_0=0, - a=self.earth_radius, - k_0=1.0) - - # UTM to LCC - self.center_longitudes, self.center_latitudes = projection(x, y, inverse=True) - self.boundary_longitudes, self.boundary_latitudes = projection(x_b, y_b, inverse=True) - - settings.write_time('LccGrid', 'create_coords', timeit.default_timer() - st_time, level=2) - - -if __name__ == '__main__': - pass diff --git a/hermesv3_bu/modules/grids/grid_mercator.py b/hermesv3_bu/modules/grids/grid_mercator.py deleted file mode 100644 index f3104fbf6c2ba50452c63219260c457aef06986b..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/grids/grid_mercator.py +++ /dev/null @@ -1,193 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import os -import sys -import timeit -import hermesv3_gr.config.settings as settings -from grid import Grid - - -class MercatorGrid(Grid): - """ - Mercator grid object that contains all the information to do a mercator output. - - :param grid_type: Type of the output grid [global, rotated, lcc, mercator]. - :type grid_type: str - - :param vertical_description_path: Path to the file that contains the vertical description. - :type vertical_description_path: str - - :param timestep_num: Number of timesteps. - :type timestep_num: int - - :param temporal_path: Path to the temporal folder. - :type temporal_path: str - - :param lon_0: Value of the Lon0 for the LCC grid type. - :type lon_0: float - - :param nx: Number of cells on the x dimension. - :type nx: int - - :param ny: Number of cells on the y dimension. - :type ny: int - - :param inc_x: Increment between x dimensions cell centroids (metres). - :type inc_x: int - - :param inc_y: Increment between y dimensions cell centroids (metres). - :type inc_y: int - - :param x_0: Value of the X0 for the LCC grid type. - :type x_0: float - - :param y_0: Value of the Y0 for the LCC grid type. - :type y_0: float - - :param earth_radius: Radius of the Earth (metres). - Default = 6370000.000 - :type earth_radius: float - """ - - def __init__(self, grid_type, vertical_description_path, timestep_num, temporal_path, lat_ts, lon_0, - nx, ny, inc_x, inc_y, x_0, y_0, earth_radius=6370000.000): - import ESMF - st_time = timeit.default_timer() - settings.write_log('\tCreating Mercator grid.', level=2) - - # Initialises with parent class - super(MercatorGrid, self).__init__(grid_type, vertical_description_path, temporal_path) - - # Setting parameters - self.lat_ts = lat_ts - self.lon_0 = lon_0 - self.nx = nx - self.ny = ny - self.inc_x = inc_x - self.inc_y = inc_y - self.x_0 = x_0 + (inc_x / 2) - self.y_0 = y_0 + (inc_y / 2) - self.earth_radius = earth_radius - - # UTM coordinates - self.x = None - self.y = None - - # Creating coordinates - self.crs = "+proj=merc +a={2} +b={2} +lat_ts={0} +lon_0={1}".format(self.lat_ts, self.lon_0, earth_radius) - - self.create_coords() - - if not os.path.exists(self.coords_netcdf_file): - if settings.rank == 0: - self.write_coords_netcdf() - settings.comm.Barrier() - - self.esmf_grid = super(MercatorGrid, self).create_esmf_grid_from_file(self.coords_netcdf_file, sphere=False) - # - self.x_lower_bound = self.esmf_grid.lower_bounds[ESMF.StaggerLoc.CENTER][1] - self.x_upper_bound = self.esmf_grid.upper_bounds[ESMF.StaggerLoc.CENTER][1] - self.y_lower_bound = self.esmf_grid.lower_bounds[ESMF.StaggerLoc.CENTER][0] - self.y_upper_bound = self.esmf_grid.upper_bounds[ESMF.StaggerLoc.CENTER][0] - - self.shape = (timestep_num, len(self.vertical_description), self.x_upper_bound-self.x_lower_bound, - self.y_upper_bound-self.y_lower_bound) - # print 'Rank {0} _3_\n'.format(settings.rank) - settings.comm.Barrier() - # print 'Rank {0} _4_\n'.format(settings.rank) - self.cell_area = self.get_cell_area()[self.x_lower_bound:self.x_upper_bound, - self.y_lower_bound:self.y_upper_bound] - - settings.write_time('MercatorGrid', 'Init', timeit.default_timer() - st_time, level=1) - - def write_coords_netcdf(self): - """ - Writes the temporal file with the coordinates of the output needed to generate the weight matrix. - If it is already well created it will only add the cell_area parameter. - """ - from hermesv3_gr.tools.netcdf_tools import write_netcdf - - st_time = timeit.default_timer() - - if not self.chech_coords_file(): - # Writes an auxiliary empty NetCDF only with the coordinates and an empty variable. - write_netcdf(self.coords_netcdf_file, self.center_latitudes, self.center_longitudes, - [{'name': 'var_aux', 'units': '', 'data': 0}], - boundary_latitudes=self.boundary_latitudes, boundary_longitudes=self.boundary_longitudes, - mercator=True, lcc_x=self.x, lcc_y=self.y, lon_0=self.lon_0, lat_ts=self.lat_ts) - - # Calculate the cell area of the auxiliary NetCDF file - self.cell_area = self.get_cell_area() - - # Re-writes the NetCDF adding the cell area - write_netcdf(self.coords_netcdf_file, self.center_latitudes, self.center_longitudes, - [ - {'name': 'var_aux', - 'units': '', - 'data': 0} - ], - boundary_latitudes=self.boundary_latitudes, boundary_longitudes=self.boundary_longitudes, - cell_area=self.cell_area, - mercator=True, lcc_x=self.x, lcc_y=self.y, lon_0=self.lon_0, lat_ts=self.lat_ts) - else: - self.cell_area = self.get_cell_area() - - settings.write_time('MercatorGrid', 'write_coords_netcdf', timeit.default_timer() - st_time, level=3) - - def create_coords(self): - """ - Create the coordinates for a lambert conformal conic domain. - """ - import numpy as np - from pyproj import Proj - - st_time = timeit.default_timer() - - # Create a regular grid in metres (Two 1D arrays) - self.x = np.arange(self.x_0, self.x_0 + self.inc_x * self.nx, self.inc_x, dtype=np.float) - if len(self.x)//2 < settings.size: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise AttributeError("ERROR: Maximum number of processors exceeded. " + - "It has to be less or equal than {0}.".format(len(self.x)//2)) - sys.exit(1) - self.y = np.arange(self.y_0, self.y_0 + self.inc_y * self.ny, self.inc_y, dtype=np.float) - - # 1D to 2D - x = np.array([self.x] * len(self.y)) - y = np.array([self.y] * len(self.x)).T - - # Create UTM bounds - y_b = super(MercatorGrid, self).create_bounds(y, self.inc_y, number_vertices=4, inverse=True) - x_b = super(MercatorGrid, self).create_bounds(x, self.inc_x, number_vertices=4) - - # Create the LCC projection - projection = Proj(self.crs) - - # UTM to Mercator - self.center_longitudes, self.center_latitudes = projection(x, y, inverse=True) - self.boundary_longitudes, self.boundary_latitudes = projection(x_b, y_b, inverse=True) - - settings.write_time('MercatorGrid', 'create_coords', timeit.default_timer() - st_time, level=3) - - -if __name__ == '__main__': - pass diff --git a/hermesv3_bu/modules/grids/grid_rotated.py b/hermesv3_bu/modules/grids/grid_rotated.py deleted file mode 100644 index 856630075c7ed241f39dbe6818bf0dae9fe8b18c..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/grids/grid_rotated.py +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import sys -import os -import timeit -import hermesv3_gr.config.settings as settings -from grid import Grid - - -class RotatedGrid(Grid): - # TODO Rotated options description - """ - :param grid_type: Type of the output grid [global, rotated, lcc, mercator]. - :type grid_type: str - - :param vertical_description_path: Path to the file that contains the vertical description. - :type vertical_description_path: str - - - :param timestep_num: Number of timesteps. - :type timestep_num: int - """ - - def __init__(self, grid_type, vertical_description_path, timestep_num, temporal_path, centre_lat, centre_lon, - west_boundary, south_boundary, inc_rlat, inc_rlon): - import ESMF - - st_time = timeit.default_timer() - settings.write_log('\tCreating Rotated grid.', level=2) - - # Initialises with parent class - super(RotatedGrid, self).__init__(grid_type, vertical_description_path, temporal_path) - - # Setting parameters - self.new_pole_longitude_degrees = -180 + centre_lon - self.new_pole_latitude_degrees = centre_lat # 90 - centre_lat - self.centre_lat = centre_lat - self.centre_lon = centre_lon - self.west_boundary = west_boundary # + inc_rlon #/ 2 - self.south_boundary = south_boundary # + inc_rlat #/ 2 - self.inc_rlat = inc_rlat - self.inc_rlon = inc_rlon - self.n_lat = int((abs(south_boundary) / inc_rlat) * 2 + 1) - self.n_lon = int((abs(west_boundary) / inc_rlon) * 2 + 1) - - # Rotated coordinates - self.rlat = None - self.rlon = None - - # Create coordinates - self.crs = {'init': 'epsg:4326'} - self.create_coords() - - if not os.path.exists(self.coords_netcdf_file): - if settings.rank == 0: - # super(RotatedGrid, self).write_coords_netcdf() - self.write_coords_netcdf() - settings.comm.Barrier() - - # self.write_coords_netcdf() - - self.esmf_grid = super(RotatedGrid, self).create_esmf_grid_from_file(self.coords_netcdf_file, sphere=False) - - self.x_lower_bound = self.esmf_grid.lower_bounds[ESMF.StaggerLoc.CENTER][1] - self.x_upper_bound = self.esmf_grid.upper_bounds[ESMF.StaggerLoc.CENTER][1] - self.y_lower_bound = self.esmf_grid.lower_bounds[ESMF.StaggerLoc.CENTER][0] - self.y_upper_bound = self.esmf_grid.upper_bounds[ESMF.StaggerLoc.CENTER][0] - - self.shape = (timestep_num, len(self.vertical_description), self.x_upper_bound-self.x_lower_bound, - self.y_upper_bound-self.y_lower_bound) - - self.cell_area = self.get_cell_area()[self.x_lower_bound:self.x_upper_bound, - self.y_lower_bound:self.y_upper_bound] - - settings.write_time('RotatedGrid', 'Init', timeit.default_timer() - st_time, level=1) - - def create_coords(self): - """ - Create the coordinates for a rotated domain. - """ - from hermesv3_gr.tools.coordinates_tools import create_regular_rotated - import numpy as np - - st_time = timeit.default_timer() - settings.write_log('\t\tCreating rotated coordinates.', level=3) - - # Create rotated coordinates - (self.rlat, self.rlon, br_lats_single, br_lons_single) = create_regular_rotated( - self.south_boundary, self.west_boundary, self.inc_rlat, self.inc_rlon, self.n_lat, self.n_lon) - if len(self.rlon)//2 < settings.size: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise AttributeError("ERROR: Maximum number of processors exceeded. " + - "It has to be less or equal than {0}.".format(len(self.rlon)//2)) - sys.exit(1) - # 1D to 2D - c_lats = np.array([self.rlat] * len(self.rlon)).T - c_lons = np.array([self.rlon] * len(self.rlat)) - - # Create rotated boundary coordinates - b_lats = super(RotatedGrid, self).create_bounds(c_lats, self.inc_rlat, number_vertices=4, inverse=True) - b_lons = super(RotatedGrid, self).create_bounds(c_lons, self.inc_rlon, number_vertices=4) - - # Rotated to Lat-Lon - self.boundary_longitudes, self.boundary_latitudes = self.rotated2latlon(b_lons, b_lats) - self.center_longitudes, self.center_latitudes = self.rotated2latlon(c_lons, c_lats) - - settings.write_time('RotatedGrid', 'create_coords', timeit.default_timer() - st_time, level=2) - - def rotated2latlon(self, lon_deg, lat_deg, lon_min=-180): - """ - Calculate the unrotated coordinates using the rotated ones. - - :param lon_deg: Rotated longitude coordinate. - :type lon_deg: numpy.array - - :param lat_deg: Rotated latitude coordinate. - :type lat_deg: numpy.array - - :param lon_min: Minimum value for the longitudes: -180 (-180 to 180) or 0 (0 to 360) - :type lon_min: float - - :return: Unrotated coordinates. Longitudes, Latitudes - :rtype: tuple(numpy.array, numpy.array) - """ - import numpy as np - import math - - st_time = timeit.default_timer() - settings.write_log('\t\t\tTransforming rotated coordinates to latitude, longitude coordinates.', level=3) - - # TODO Document this function - degrees_to_radians = math.pi / 180. - # radians_to_degrees = 180. / math.pi - - # Positive east to negative east - # self.new_pole_longitude_degrees -= 180 - - tph0 = self.new_pole_latitude_degrees * degrees_to_radians - tlm = lon_deg * degrees_to_radians - tph = lat_deg * degrees_to_radians - tlm0d = self.new_pole_longitude_degrees - ctph0 = np.cos(tph0) - stph0 = np.sin(tph0) - - stlm = np.sin(tlm) - ctlm = np.cos(tlm) - stph = np.sin(tph) - ctph = np.cos(tph) - - # Latitude - sph = (ctph0 * stph) + (stph0 * ctph * ctlm) - # if sph > 1.: - # sph = 1. - # if sph < -1.: - # sph = -1. - # print type(sph) - sph[sph > 1.] = 1. - sph[sph < -1.] = -1. - - aph = np.arcsin(sph) - aphd = aph / degrees_to_radians - - # Longitude - anum = ctph * stlm - denom = (ctlm * ctph - stph0 * sph) / ctph0 - relm = np.arctan2(anum, denom) - math.pi - almd = relm / degrees_to_radians + tlm0d - - # if almd < min_lon: - # almd += 360 - # elif almd > max_lon: - # almd -= 360 - almd[almd > (lon_min + 360)] -= 360 - almd[almd < lon_min] += 360 - - settings.write_time('RotatedGrid', 'rotated2latlon', timeit.default_timer() - st_time, level=3) - - return almd, aphd - - def write_coords_netcdf(self): - """ - Writes the temporal file with the coordinates of the output needed to generate the weight matrix. - If it is already well created it will only add the cell_area parameter. - """ - from hermesv3_gr.modules.writing.writer import Writer - - st_time = timeit.default_timer() - settings.write_log('\tWriting {0} file.'.format(self.coords_netcdf_file), level=3) - - if not self.chech_coords_file(): - # Writes an auxiliary empty NetCDF only with the coordinates and an empty variable. - Writer.write_netcdf(self.coords_netcdf_file, self.center_latitudes, self.center_longitudes, - [{'name': 'var_aux', 'units': '', 'data': 0}], - boundary_latitudes=self.boundary_latitudes, - boundary_longitudes=self.boundary_longitudes, - roated=True, rotated_lats=self.rlat, rotated_lons=self.rlon, - north_pole_lat=self.new_pole_latitude_degrees, - north_pole_lon=self.new_pole_longitude_degrees) - - # Calculate the cell area of the auxiliary NetCDF file - self.cell_area = self.get_cell_area() - - # Re-writes the NetCDF adding the cell area - Writer.write_netcdf(self.coords_netcdf_file, self.center_latitudes, self.center_longitudes, - [{'name': 'var_aux', 'units': '', 'data': 0}], - boundary_latitudes=self.boundary_latitudes, - boundary_longitudes=self.boundary_longitudes, cell_area=self.cell_area, - roated=True, rotated_lats=self.rlat, rotated_lons=self.rlon, - north_pole_lat=self.new_pole_latitude_degrees, - north_pole_lon=self.new_pole_longitude_degrees) - else: - self.cell_area = self.get_cell_area() - - settings.write_time('RotatedGrid', 'write_coords_netcdf', timeit.default_timer() - st_time, level=3) - - -if __name__ == '__main__': - pass diff --git a/hermesv3_bu/modules/masking/masking.py b/hermesv3_bu/modules/masking/masking.py deleted file mode 100644 index 36b1c93f85a709e9e5e7c771e9c39b9a3248ae09..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/masking/masking.py +++ /dev/null @@ -1,301 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import os -import timeit -from warnings import warn as warning -import hermesv3_gr.config.settings as settings - - -class Masking(object): - """ - Masking object to apply simple mask or factor mask. - - :param world_info: Path to the file that contains the ISO Codes and other relevant information. - :type world_info: str - - :param factors_mask_values: List of the factor mask values. - :type factors_mask_values: list - - :param regrid_mask_values: List of the mask values. - :type regrid_mask_values: list - - :param grid: Grid. - :type grid: Grid - - :param world_mask_file: - :type world_mask_file: str - """ - - def __init__(self, world_info, factors_mask_values, regrid_mask_values, grid, world_mask_file=None): - from timezonefinder import TimezoneFinder - - st_time = timeit.default_timer() - settings.write_log('\t\tCreating mask.', level=2) - - self.adding = None - self.world_info = world_info - self.country_codes = self.get_country_codes() - self.world_mask_file = world_mask_file - self.factors_mask_values = self.parse_factor_values(factors_mask_values) - self.regrid_mask_values = self.parse_masking_values(regrid_mask_values) - self.regrid_mask = None - self.scale_mask = None - self.timezonefinder = TimezoneFinder() - - self.grid = grid - - settings.write_time('Masking', 'Init', timeit.default_timer() - st_time, level=3) - - def get_country_codes(self): - """ - Get the country code information. - - :return: Dictionary of country codes. - :rtype: dict - """ - import pandas as pd - - st_time = timeit.default_timer() - - dataframe = pd.read_csv(self.world_info, sep=';') - del dataframe['time_zone'], dataframe['time_zone_code'] - dataframe = dataframe.drop_duplicates().dropna() - dataframe = dataframe.set_index('country_code_alpha') - countries_dict = dataframe.to_dict() - countries_dict = countries_dict['country_code'] - - settings.write_time('Masking', 'get_country_codes', timeit.default_timer() - st_time, level=3) - return countries_dict - - @staticmethod - def partlst(lst, num): - """ - Split a Array in N balanced arrays. - - :param lst: Array to split - :type lst: numpy.array - - :param num: Number of mini arrays. - :type num: int - - :return: Array - :type: numpy.array - """ - import itertools - # Partition @lst in @n balanced parts, in given order - parts, rest = divmod(len(lst), num) - lstiter = iter(lst) - for j in xrange(num): - plen = len(lst) / num + (1 if rest > 0 else 0) - rest -= 1 - yield list(itertools.islice(lstiter, plen)) - - def create_country_iso(self, in_nc): - import numpy as np - from hermesv3_gr.tools.netcdf_tools import extract_vars - from hermesv3_gr.modules.writing.writer import Writer - - st_time = timeit.default_timer() - settings.write_log('\t\t\tCreating {0} file.'.format(self.world_mask_file), level=2) - # output_path = os.path.join(output_dir, 'iso.nc') - - lat_o, lon_o = extract_vars(in_nc, ['lat', 'lon']) - lon = np.array([lon_o['data']] * len(lat_o['data'])) - lat = np.array([lat_o['data']] * len(lon_o['data'])).T - - dst_var = [] - num = 0 - points = np.array(zip(lat.flatten(), lon.flatten())) - - points_list = list(self.partlst(points, settings.size)) - - for lat_aux, lon_aux in points_list[settings.rank]: - num += 1 - - settings.write_log("\t\t\t\tlat:{0}, lon:{1} ({2}/{3})".format( - lat_aux, lon_aux, num, len(points_list[settings.rank])), level=3) - - tz = self.find_timezone(lat_aux, lon_aux) - tz_id = self.get_iso_code_from_tz(tz) - dst_var.append(tz_id) - dst_var = np.array(dst_var) - dst_var = settings.comm.gather(dst_var, root=0) - - if settings.rank == 0: - dst_var = np.concatenate(dst_var) - dst_var = dst_var.reshape((1,) + lat.shape) - data = [{ - 'name': 'timezone_id', - 'units': '', - 'data': dst_var, - }] - Writer.write_netcdf(self.world_mask_file, lat, lon, data, regular_latlon=True) - settings.comm.Barrier() - - settings.write_time('Masking', 'create_country_iso', timeit.default_timer() - st_time, level=3) - - return True - - def find_timezone(self, latitude, longitude): - - st_time = timeit.default_timer() - - if longitude < -180: - longitude += 360 - elif longitude > +180: - longitude -= 360 - - tz = self.timezonefinder.timezone_at(lng=longitude, lat=latitude) - - settings.write_time('Masking', 'find_timezone', timeit.default_timer() - st_time, level=3) - - return tz - - def get_iso_code_from_tz(self, tz): - import pandas as pd - - st_time = timeit.default_timer() - - zero_values = [None, ] - if tz in zero_values: - return 0 - - df = pd.read_csv(self.world_info, sep=';') - code = df.country_code[df.time_zone == tz].values - - settings.write_time('Masking', 'get_iso_code_from_tz', timeit.default_timer() - st_time, level=3) - - return code[0] - - def parse_factor_values(self, values): - """ - - :param values: - :return: - :rtype: dict - """ - import re - - st_time = timeit.default_timer() - - if type(values) != str: - return None - values = list(map(str, re.split(' , |, | ,|,', values))) - scale_dict = {} - for element in values: - element = list(map(str, re.split("{0}{0}|{0}".format(' '), element))) - scale_dict[int(self.country_codes[element[0]])] = element[1] - - settings.write_log('\t\t\tApplying scaling factors for {0}.'.format(values), level=3) - settings.write_time('Masking', 'parse_factor_values', timeit.default_timer() - st_time, level=3) - - return scale_dict - - def parse_masking_values(self, values): - """ - - :param values: - :return: - :rtype: list - """ - import re - - st_time = timeit.default_timer() - - if type(values) != str: - return None - values = list(map(str, re.split(' , |, | ,|,| ', values))) - if values[0] == '+': - self.adding = True - elif values[0] == '-': - self.adding = False - else: - if len(values) > 0: - settings.write_log('WARNING: Check the .err file to get more info. Ignoring mask') - if settings.rank == 0: - warning("WARNING: The list of masking does not start with '+' or '-'. Ignoring mask.") - return None - code_list = [] - for country in values[1:]: - code_list.append(int(self.country_codes[country])) - - if self.adding: - settings.write_log("\t\t\tCreating mask to do {0} countries.".format(values[1:]), level=3) - else: - settings.write_log("\t\t\tCreating mask to avoid {0} countries.".format(values[1:]), level=3) - settings.write_time('Masking', 'parse_masking_values', timeit.default_timer() - st_time, level=3) - - return code_list - - def check_regrid_mask(self, input_file): - - if self.regrid_mask_values is not None: - if not os.path.exists(self.world_mask_file): - self.create_country_iso(input_file) - self.regrid_mask = self.custom_regrid_mask() - if self.factors_mask_values is not None: - if not os.path.exists(self.world_mask_file): - self.create_country_iso(input_file) - self.scale_mask = self.custom_scale_mask() - - def custom_regrid_mask(self): - import numpy as np - from netCDF4 import Dataset - - st_time = timeit.default_timer() - - netcdf = Dataset(self.world_mask_file, mode='r') - values = netcdf.variables['timezone_id'][:] - netcdf.close() - - if self.adding: - mask = np.zeros(values.shape) - for code in self.regrid_mask_values: - mask[values == code] = 1 - else: - mask = np.ones(values.shape) - for code in self.regrid_mask_values: - mask[values == code] = 0 - - settings.write_time('Masking', 'custom_regrid_mask', timeit.default_timer() - st_time, level=3) - - return mask - - def custom_scale_mask(self): - import numpy as np - from hermesv3_gr.tools.netcdf_tools import extract_vars - - st_time = timeit.default_timer() - - [values] = extract_vars(self.world_mask_file, ['timezone_id']) - - values = values['data'] - mask = np.ones(values.shape) - for code, factor in self.factors_mask_values.iteritems(): - mask[values == code] = factor - - settings.write_time('Masking', 'custom_scale_mask', timeit.default_timer() - st_time, level=3) - - return mask - - -if __name__ == '__main__': - pass diff --git a/hermesv3_bu/modules/point_source/point_source.py b/hermesv3_bu/modules/point_source/point_source.py deleted file mode 100644 index 2d5fa6ac05b5ab528570d3981d37fe2d633a9a94..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/point_source/point_source.py +++ /dev/null @@ -1,719 +0,0 @@ -#!/usr/bin/env python - -import sys -import os -from timeit import default_timer as gettime -# import pandas as pd -# import geopandas as gpd -# import numpy as np -# from shapely.ops import nearest_points -# import warnings -import IN.src.config.settings as settings - -# TODO some pollutants - - -class PointSource(object): - """ - Class to calculate the Point Source emissions - - :param grid: Grid of the destination domain - :type grid: Grid - - :param catalog_path: Path to the fine that contains all the information for each point source. - :type catalog_path: str - - :param monthly_profiles_path: Path to the file that contains the monthly profiles. - :type monthly_profiles_path: str - - :param daily_profiles_path: Path to the file that contains the daily profiles. - :type daily_profiles_path: str - - :param hourly_profiles_path: Path to the file that contains the hourly profile. - :type hourly_profiles_path: str - - :param speciation_map_path: Path to the file that contains the speciation map. - :type speciation_map_path: str - - :param speciation_profiles_path: Path to the file that contains the speciation profiles. - :type speciation_profiles_path: str - - :param sector_list: List os sectors (SNAPS) to take into account. 01, 03, 04, 09 - :type sector_list: list - """ - def __init__(self, grid, catalog_path, monthly_profiles_path, daily_profiles_path, hourly_profiles_path, - speciation_map_path, speciation_profiles_path, sector_list, effective_stack_height, pollutant_list, - measured_emission_path, molecular_weights_path=None): - import pandas as pd - - self.pollutant_list = pollutant_list - - self.catalog = self.read_catalog(catalog_path, sector_list) - self.catalog_measured = self.read_catalog_for_measured_emissions(catalog_path, sector_list) - self.measured_path = measured_emission_path - - self.grid = grid - - self.monthly_profiles = self.read_monthly_profiles(monthly_profiles_path) - self.daily_profiles = self.read_daily_profiles(daily_profiles_path) - self.hourly_profiles = self.read_hourly_profiles(hourly_profiles_path) - - self.speciation_map = self.read_speciation_map(speciation_map_path) - self.speciation_profiles = self.read_speciation_profiles(speciation_profiles_path) - self.effective_stack_height = effective_stack_height - - self.molecular_weigths = pd.read_csv(molecular_weights_path, sep=';') - - @staticmethod - def read_speciation_map(path): - """ - Read the Dataset of the speciation map. - - :param path: Path to the file that contains the speciation map. - :type path: str - - :return: Dataset od the speciation map. - :rtype: pandas.DataFrame - """ - import pandas as pd - - map = pd.read_csv(path, sep=';') - - return map - - @staticmethod - def read_speciation_profiles(path): - """ - Read the Dataset of the speciation profiles. - - :param path: Path to the file that contains the speciation profiles. - :type path: str - - :return: Dataset od the speciation profiles. - :rtype: pandas.DataFrame - """ - import pandas as pd - - profiles = pd.read_csv(path, sep=',') - - return profiles - - @staticmethod - def read_monthly_profiles(path): - """ - Read the Dataset of the monthly profiles with the month number as columns. - - :param path: Path to the file that contains the monthly profiles. - :type path: str - - :return: Dataset od the monthly profiles. - :rtype: pandas.DataFrame - """ - import pandas as pd - - profiles = pd.read_csv(path) - - profiles.rename(columns={'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, 'July': 7, - 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12}, - inplace=True) - return profiles - - @staticmethod - def read_daily_profiles(path): - """ - Read the Dataset of the daily profiles with the days as numbers (Monday: 0 - Sunday:6) as columns. - - - :param path: Path to the file that contains the daily profiles. - :type path: str - - :return: Dataset od the daily profiles. - :rtype: pandas.DataFrame - """ - import pandas as pd - - profiles = pd.read_csv(path) - - profiles.rename(columns={'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3, 'Friday': 4, 'Saturday': 5, - 'Sunday': 6, }, inplace=True) - return profiles - - @staticmethod - def read_hourly_profiles(path): - """ - Read the Dataset of the hourly profiles with the hours (int) as columns. - - :param path: Path to the file that contains the monthly profiles. - :type path: str - - :return: Dataset od the monthly profiles. - :rtype: pandas.DataFrame - """ - import pandas as pd - - profiles = pd.read_csv(path) - profiles.rename(columns={'P_hour': -1, '00': 0, '01': 1, '02': 2, '03': 3, '04': 4, '05': 5, '06': 6, '07': 7, - '08': 8, '09': 9, '10': 10, '11': 11, '12': 12, '13': 13, '14': 14, '15': 15, '16': 16, - '17': 17, '18': 18, '19': 19, '20': 20, '21': 21, '22': 22, '23': 23}, inplace=True) - profiles.columns = profiles.columns.astype(int) - profiles.rename(columns={-1: 'P_hour'}, inplace=True) - return profiles - - def read_catalog(self, catalog_path, sector_list): - """ - Read the catalog - - :param catalog_path: path to the catalog - :type catalog_path: str - - :param sector_list: List of sectors to take into account - :type sector_list: list - - :return: catalog - :rtype: pandas.DataFrame - """ - import pandas as pd - import numpy as np - - columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Lon": settings.precision, - "Lat": settings.precision, "Height": settings.precision, "AF": settings.precision, - "P_month": np.str, "P_week": np.str, "P_hour": np.str, "P_spec": np.str} - for pollutant in self.pollutant_list: - columns['EF_{0}'.format(pollutant)] = settings.precision - - catalog_df = pd.read_csv(catalog_path, usecols=columns.keys(), dtype=columns) - - # Filtering - catalog_df = catalog_df.loc[catalog_df['Cons'] == 1, :] - catalog_df.drop('Cons', axis=1, inplace=True) - - # Filtering - catalog_df = catalog_df.loc[catalog_df['AF'] != -1, :] - - if sector_list is not None: - catalog_df = catalog_df.loc[catalog_df['SNAP'].str[:2].isin(sector_list)] - catalog_df.drop('SNAP', axis=1, inplace=True) - - # TODO Select only involved point sources in the working domain - - return catalog_df - - def read_catalog_for_measured_emissions(self, catalog_path, sector_list): - """ - Read the catalog - - :param catalog_path: path to the catalog - :type catalog_path: str - - :param sector_list: List of sectors to take into account - :type sector_list: list - - :return: catalog - :rtype: pandas.DataFrame - """ - import pandas as pd - import numpy as np - - columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Lon": settings.precision, - "Lat": settings.precision, "Height": settings.precision, "AF": settings.precision, "P_spec": np.str} - # for pollutant in self.pollutant_list: - # columns['EF_{0}'.format(pollutant)] = settings.precision - - catalog_df = pd.read_csv(catalog_path, usecols=columns.keys(), dtype=columns) - - # Filtering - catalog_df = catalog_df.loc[catalog_df['Cons'] == 1, :] - catalog_df.drop('Cons', axis=1, inplace=True) - - # Filtering - catalog_df = catalog_df.loc[catalog_df['AF'] == -1, :] - catalog_df.drop('AF', axis=1, inplace=True) - - if sector_list is not None: - catalog_df = catalog_df.loc[catalog_df['SNAP'].str[:2].isin(sector_list)] - catalog_df.drop('SNAP', axis=1, inplace=True) - - # TODO Select only involved point sources in the working domain - - return catalog_df - - @staticmethod - def to_geodataframe(catalog): - """ - Convert a simple DataFrame with Lat, Lon columns into a GeoDataFrame as a shape - - :param catalog: DataFrame with all the information of each point source. - :type catalog: pandas.DataFrame - - :return: GeoDataFrame with all the information of each point source. - :rtype: geopandas.GeoDataFrame - """ - import geopandas as gpd - from shapely.geometry import Point - - geometry = [Point(xy) for xy in zip(catalog.Lon, catalog.Lat)] - catalog.drop(['Lon', 'Lat'], axis=1, inplace=True) - crs = {'init': 'epsg:4326'} - catalog = gpd.GeoDataFrame(catalog, crs=crs, geometry=geometry) - - # catalog.to_file('/home/Earth/ctena/Models/HERMESv3/OUT/test/point_source.shp') - - return catalog - - def add_dates(self, catalog, st_date_utc, delta_hours): - """ - Add to the catalog the 'date' column (in local time) and the time step ('tstep'). - - :param catalog: Catalog to update - :type catalog: pandas.DataFrame - - :param st_date_utc: Starting date in UTC. - :type st_date_utc: datetime.datetime - - :param delta_hours: List of hours that have to sum to the first hour for each time step. - :type delta_hours: list - - :return: Catalog with the dates - :rtype: pandas.DataFrame - """ - from datetime import timedelta - import pandas as pd - - catalog = self.add_timezone(catalog) - - list_catalogs = [] - for index, hour in enumerate(delta_hours): - catalog_aux = catalog.copy() - catalog_aux['date'] = pd.to_datetime(st_date_utc + timedelta(hours=hour), utc=True) - catalog_aux['tstep'] = index - list_catalogs.append(catalog_aux) - - catalog = pd.concat(list_catalogs) - catalog.reset_index(drop=True, inplace=True) - - catalog = self.to_timezone(catalog) - - return catalog - - @staticmethod - def add_timezone(catalog): - """ - Add the timezone column with the timezone of the location of each point source. - - :param catalog: Catalog where add the timezone. - :type catalog: pandas.DataFrame - - :return: Catalog with the added timezone column. - :rtype: pandas.DataFrame - """ - from timezonefinder import TimezoneFinder - - tzfinder = TimezoneFinder() - - catalog['timezone'] = catalog['geometry'].apply(lambda x: tzfinder.timezone_at(lng=x.x, lat=x.y)) - - return catalog - - @staticmethod - def to_timezone(catalog): - """ - Set the local date with the correspondent timezone substituting the UTC date. - - :param catalog: Catalog with the UTC date column. - :type catalog: pandas.DataFrame - - :return: Catalog with the local date column. - :rtype: pandas.DataFrame - """ - import pandas as pd - - catalog['date'] = catalog.groupby('timezone')['date'].apply(lambda x: x.dt.tz_convert(x.name).dt.tz_localize(None)) - - catalog.drop('timezone', axis=1, inplace=True) - - return catalog - - def get_yearly_emissions(self, catalog): - """ - Calculate yearly emissions. - - :param catalog: Catalog with the activity factor (AF) column and all the emission factor column for each - pollutant. - :type catalog: pandas.DataFrame - - :return: Catalog with yearly emissions of each point source for all the pollutants (as column names). - :rtype: pandas.DataFrame - """ - for pollutant in self.pollutant_list: - catalog.rename(columns={u'EF_{0}'.format(pollutant): pollutant}, inplace=True) - catalog[pollutant] = catalog[pollutant] * catalog['AF'] - - catalog.drop('AF', axis=1, inplace=True) - return catalog - - @staticmethod - def calculate_rebalance_factor(profile, date): - """ - Calculate the necessary factor to make consistent the full month data. This is needed for the months that if you - sum the daily factor of each day of the month it doesn't sum as the number of days of the month. - - :param profile: Daily profile. - :type profile: dict - - :param date: Date of the timestep to simulate. - :type date: datetime.datetime - - :return: Dataset with the corrected values for the daily profiles. - :rtype: pandas.DataFrame - """ - import pandas as pd - weekdays = PointSource.calculate_weekdays(date) - rebalanced_profile = PointSource.calculate_weekday_factor_full_month(profile, weekdays) - rebalanced_profile = pd.DataFrame.from_dict(rebalanced_profile) - - return rebalanced_profile - - @staticmethod - def calculate_weekday_factor_full_month(profile, weekdays): - """ - Operate with all the days of the month to get the sum of daily factors of the full month. - - :param profile: input profile - :type profile: dict - - :param weekdays: Dictionary with the number of days of each day type (Monday, Tuesday, ...) - :type weekdays: dict - - :return: Dictionary with the corrected profile. - :rtype: dict - """ - weekdays_factors = 0 - num_days = 0 - for day in xrange(7): - weekdays_factors += profile[day] * weekdays[day] - num_days += weekdays[day] - increment = (num_days - weekdays_factors) / num_days - - for day in xrange(7): - profile[day] = [(increment + profile[day]) / num_days] - - return profile - - @staticmethod - def calculate_weekdays(date): - """ - Calculate the number of days of each day type for the given month of the year. - - :param date: Date to select the month to evaluate. - :type date: datetime.datetime - - :return: Dictionary with the number of days of each day type (Monday, Tuesday, ...) - :rtype: dict - """ - from calendar import monthrange, weekday, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY - weekdays = [MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY] - days = [weekday(date.year, date.month, d + 1) for d in xrange(monthrange(date.year, date.month)[1])] - - weekdays_dict = {} - for i, day in enumerate(weekdays): - weekdays_dict[i] = days.count(day) - - return weekdays_dict - - def get_temporal_factors(self, catalog, st_date): - """ - Calculates the temporal factor for each point source and each time step. - - :param catalog: Catalog with the activity factor (AF) column and all the emission factor column for each - pollutant. - :type catalog: pandas.DataFrame - - :return: Catalog with yearly emissions of each point source for all the pollutants (as column names). - :rtype: pandas.DataFrame - """ - import pandas as pd - - def set_monthly_profile(x): - """ - Extracts the monthly profile for the given Series. - - :param x: Series to modify - :type x: pandas.Series - - :return: Modified Series. - :rtype: pandas.Series - """ - profile = self.monthly_profiles[self.monthly_profiles['P_month'] == x.name] - for month, aux_df in x.groupby(x.dt.month): - x.loc[aux_df.index] = profile[month].values[0] - return x - - def set_daily_profile(x, st_date): - """ - Extracts the daily profile for the given Series and correct them with the rebalanced factor. - - :param x: Series to modify - :type x: pandas.Series - - :param st_date: Date to evaluate. Necessary for rebalanced the factor. - :type st_date: datetime.datetime - - :return: Modified Series. - :rtype: pandas.Series - """ - profile = self.daily_profiles[self.daily_profiles['P_week'] == x.name] - profile = self.calculate_rebalance_factor(profile.to_dict('records')[0], st_date) - - for weekday, aux_df in x.groupby(x.dt.weekday): - x.loc[aux_df.index] = profile[weekday].values[0] - return x - - def set_hourly_profile(x): - """ - Extracts the hourly profile for the given Series. - - :param x: Series to modify - :type x: pandas.Series - - :return: Modified Series. - :rtype: pandas.Series - """ - profile = self.hourly_profiles[self.hourly_profiles['P_hour'] == x.name] - for hour, aux_df in x.groupby(x.dt.hour): - x.loc[aux_df.index] = profile[hour].values[0] - return x - - catalog['P_month'] = catalog.groupby('P_month')['date'].apply(set_monthly_profile) - catalog['P_week'] = catalog.groupby('P_week')['date'].apply(lambda x: set_daily_profile(x, st_date)) - catalog['P_hour'] = catalog.groupby('P_hour')['date'].apply(set_hourly_profile) - - catalog['temp_factor'] = catalog['P_month'] * catalog['P_week'] * catalog['P_hour'] - catalog.drop(['P_month', 'P_week', 'P_hour'], axis=1, inplace=True) - - for pollutant in self.pollutant_list: - catalog[pollutant] = catalog[pollutant] * catalog['temp_factor'] - catalog.drop('temp_factor', axis=1, inplace=True) - - return catalog - - def calculate_hourly_emissions(self, catalog, st_date): - """ - Calculate the hourly emissions - - :param catalog: Catalog to calculate. - :type catalog: pandas.DataFrame - - :param st_date: Starting date to simulate (UTC). - :type st_date: dateitme.datetime - - :return: Catalog with the hourly emissions. - :rtype: pandas.DataFrame - """ - - catalog = self.get_yearly_emissions(catalog) - catalog = self.get_temporal_factors(catalog, st_date) - - return catalog - - def calculate_vertical_distribution(self, catalog, vertical_levels): - """ - Add the layer column to indicate at what layer the emission have to go. - - :param catalog: Catalog to calculate. - :type catalog: pandas.DataFrame - - :param vertical_levels: List with the maximum altitude of each layer in meters. - :type vertical_levels: list - - :return: Catalog with the level. - :rtype: pandas.DataFrame - """ - import numpy as np - - if self.effective_stack_height: - catalog['Height'] = catalog['Height'] * 1.2 - - catalog['layer'] = np.searchsorted(vertical_levels, catalog['Height'], side='left') - - catalog.drop('Height', axis=1, inplace=True) - - return catalog - - def speciate(self, catalog): - """ - Speciate the catalog for the output pollutants. - - :param catalog: Catalog to speciate. - :type catalog: pandas.DataFrame - - :return: Speciated catalog. - :rtype: pandas.DataFrame - """ - import pandas as pd - import numpy as np - - def do_speciation(x, input_pollutant, output_pollutant): - """ - Do the speciation for a specific pollutant. - - :param x: Serie with the pollutant to specieate. - :type x: pandas.Series - - :param input_pollutant: Name of the input pollutant. - :type input_pollutant: str - - :param output_pollutant: Name of the output pollutant. - :type output_pollutant: str - - :return: Speciated Series - :rtype: pandas.Series - """ - mol_weight = self.molecular_weigths.loc[self.molecular_weigths['Specie'] == input_pollutant, 'MW'].values[0] - - profile = self.speciation_profiles[self.speciation_profiles['P_spec'] == x.name] - if output_pollutant == 'PMC': - x = catalog.loc[x.index, 'pm10'] - catalog.loc[x.index, 'pm25'] - - if input_pollutant == 'nmvoc': - x = x * profile['VOCtoTOG'].values[0] * (profile[output_pollutant].values[0] / mol_weight) - else: - x = x * (profile[out_p].values[0] / mol_weight) - return x - - speciated_catalog = catalog.drop(self.pollutant_list, axis=1) - - for out_p in self.speciation_map['dst'].values: - in_p = self.speciation_map.loc[self.speciation_map['dst'] == out_p, 'src'].values[0] - if type(in_p) == float and np.isnan(in_p): - in_p = 'pm10' - speciated_catalog[out_p] = catalog.groupby('P_spec')[in_p].apply(lambda x: do_speciation(x, in_p, out_p)) - - speciated_catalog.drop('P_spec', axis=1, inplace=True) - - return speciated_catalog - - def add_measured_emissions(self, catalog, st_date, delta_hours): - def func(x, pollutant): - import pandas as pd - from datetime import timedelta - measured_emissions = self.measured_path.replace('', x.name) - measured_emissions = pd.read_csv(measured_emissions, sep=';') - measured_emissions = measured_emissions.loc[measured_emissions['Code'] == x.name, :] - - measured_emissions['date'] = pd.to_datetime(measured_emissions['date']) + pd.to_timedelta( - measured_emissions['local_to_UTC'], unit='h') - - measured_emissions.drop('local_to_UTC', axis=1, inplace=True) - - # dates_array = [st_date + timedelta(hours=hour) for hour in delta_hours] - # measured_emissions = measured_emissions.loc[measured_emissions['date'].isin(dates_array), :] - code = x.name - x = pd.DataFrame(x) - x.rename(columns={code: 'date'}, inplace=True) - - test = pd.merge(left=x, right=measured_emissions.loc[:, ['date', pollutant]], on='date', how='inner') - test.set_index(x.index, inplace=True) - - return test[pollutant] - for pollutant in self.pollutant_list: - catalog[pollutant] = catalog.groupby('Code')['date'].apply(lambda x: func(x, pollutant)) - - return catalog - - def calculate_measured_emissions(self, catalog, st_date, delta_hours): - if len(catalog) == 0: - return None - else: - catalog = self.to_geodataframe(catalog) - catalog = self.add_dates(catalog, st_date, delta_hours) - - catalog = self.add_measured_emissions(catalog, st_date, delta_hours) - - return catalog - - def merge_catalogs(self, catalog_list): - import pandas as pd - - catalog = pd.concat(catalog_list) - - catalog.reset_index(inplace=True) - - return catalog - - def calculate_point_source_emissions(self, st_date, delta_hours, vertical_levels): - """ - Process to calculate the poitn source emissions. - - :param st_date: Starting date to simulate (UTC). - :type st_date: dateitme.datetime - - :param delta_hours: List of hours that have to sum to the first hour for each time step. - :type delta_hours: list - - :param vertical_levels: List with the maximum altitude of each layer in meters. - :type vertical_levels: list - - :return: Catalog with the calculated emissions. - :rtype: pandas.DataFrame - """ - self.catalog = self.to_geodataframe(self.catalog) - - self.catalog = self.add_dates(self.catalog, st_date, delta_hours) - self.catalog = self.calculate_hourly_emissions(self.catalog, st_date) - - self.catalog_measured = self.calculate_measured_emissions(self.catalog_measured, st_date, delta_hours) - - if self.catalog_measured is not None: - self.catalog = self.merge_catalogs([self.catalog, self.catalog_measured]) - self.catalog = self.calculate_vertical_distribution(self.catalog, vertical_levels) - - self.catalog = self.speciate(self.catalog) - - # self.catalog['date'] = self.catalog['date'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S')) - # self.catalog.to_file('/home/Earth/ctena/Models/HERMESv3/OUT/test/catalog.shp') - return self.catalog - - def points_to_grid(self, catalog, grid_shape, out_list): - """ - Add the cell location for each point source and dum the values that goes to the same cell, time step and layer. - - :param catalog: Catalog to find their possition. - :type catalog: pandas.DataFrame - - :param grid_shape: Shapefile of the oputput grid. - :type grid_shape: geopandas.GeoDataFrame - - :param out_list: List of output pollutants. - :type out_list: list - - :return: List of dictionaries with the necessary information to write the netCDF. - :rtype: list - """ - import geopandas as gpd - - catalog = catalog.to_crs(grid_shape.crs) - catalog = gpd.sjoin(catalog, grid_shape, how="inner", op='intersects') - # Drops duplicates when the point source is on the boundary of the cell - catalog = catalog[~catalog.index.duplicated(keep='first')] - - try: - catalog.drop(['Code', 'index_right', 'date', 'geometry'], axis=1, inplace=True) - except ValueError: - pass - - catalog = catalog.groupby(['tstep', 'layer', 'FID']).sum() - catalog.reset_index(inplace=True) - - emission_list = [] - for out_p in out_list: - aux_data = catalog.loc[:, [out_p, 'tstep', 'layer', 'FID']] - aux_data = aux_data.loc[aux_data[out_p] > 0, :] - dict_aux = { - 'name': out_p, - 'units': '', - 'data': aux_data - } - # print dict_aux - emission_list.append(dict_aux) - - return emission_list diff --git a/hermesv3_bu/modules/traffic/traffic.py b/hermesv3_bu/modules/traffic/traffic.py deleted file mode 100644 index 8a37c27696f8a0de2094a918e1f92748dc1b5341..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/traffic/traffic.py +++ /dev/null @@ -1,1710 +0,0 @@ -#!/usr/bin/env python - -""" -Copyright 2018 Earth Sciences Department, BSC-CNS - - This file is part of HERMESv3. - - HERMESv3 is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - HERMESv3 is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with HERMESv3. If not, see . -""" - -__author__ = "Carles Tena" -__copyright__ = "Copyright 2018" -__email__ = "carles.tena@bsc.es" -__license__ = "GNU General Public License" -__maintainer__ = "Carles Tena" -__version__ = "3.3.1" - -import sys -import os -from timeit import default_timer as gettime -import IN.src.config.settings as settings - -import pandas as pd -import geopandas as gpd -import numpy as np -from shapely.ops import nearest_points -import warnings - -aerosols = ['oc', 'ec', 'pno3', 'pso4', 'pmfine', 'pmc', 'poa'] -pmc_list = ['pmc', 'PMC'] -rline_shp = False - -class Traffic(object): - # TODO MARC -> to revise these descriptions - """ - The traffic class does have all the necessary functions to calculate the traffic emission in bottom-up mode. - - Part of the traffic emissions are calculated by roadlink (hot, cold, road wear, tyre wear, brake wear and - resuspension) differentiating by vehicle type. - The other emissions (other cities and evaporative) are calculated by cell instead of by road link. - - To calculate the traffic emissions some input files are needed as the shapefile that contains the information and - geolocalization of each road link, the temporal proxies, the emission factors files and also the information - relative to the timesteps. - - :param road_link_path: Path to the file that contains all the road links information as the geometry of each road. - :type road_link_path: str - - :param fleet_compo_path: ... - :type fleet_compo_path: str - - :param speed_hourly_path: ... - :type speed_hourly_path: str - - :param monthly_profile_path: Path to the file that contains all the monthly profiles needed. - :type monthly_profile_path: str - - :param daily_profile_path: Path to the file that contains all the daily profiles needed. - :type daily_profile_path: str - - :param hourly_mean_profile_path: Path to the file that contains the hourly means profiles needed. It will be used - when no other hourly profile is set or selected. - :type hourly_mean_profile_path: str - - :param hourly_weekday_profile_path: Path to the file that contains the weekday hourly profiles needed. - :type hourly_weekday_profile_path: str - - :param hourly_saturday_profile_path: Path to the file that contains the Saturday hourly profiles needed. - :type hourly_saturday_profile_path: str - - :param hourly_sunday_profile_path: Path to the file that contains the Sunday hourly profiles needed. - :type hourly_sunday_profile_path: str - - :param ef_common_path: Path to the folder that contains all the emission factor files. The name of these files are - hardcoded like '_.csv' and also the correction files 'mcorr_.csv' - :type ef_common_path: str - - :param pollutants_list: List of pollutatnts to take into account for the hot & cold emissions. The other ones are - hardcoded to be only PM10 and PM2.5. - :type pollutants_list: list - - :param date: Starting date (UTC) for the firts timestep. - :type date: datetime.datetime - - :param load: Load of the heavy vehicles. this value can be empty (0.0) medium (0.5) or full (1.0) - :type load: float - - :param timestep_type: Increment between timestep must to be 'hourly' for the traffic bottom-up emissions. - :type timestep_type: str - - :param timestep_num: number of timesteps to simulate - :type timestep_num: int - - :param timestep_freq: Frequency between timesteps. - :type timestep_freq: int - - :param temp_common_path: Path to the auxiliar folder to store the intermediate files needed. - :type temp_common_path: str - """ - - def __init__(self, auxiliar_path, clipping, road_link_path, fleet_compo_path, speed_hourly_path, - monthly_profile_path, daily_profile_path, hourly_mean_profile_path, hourly_weekday_profile_path, - hourly_saturday_profile_path, hourly_sunday_profile_path, ef_common_path, pollutants_list, date, - grid, vehicle_list=None, load=0.5, timestep_type='hourly', timestep_num=1, timestep_freq=1, speciation_map=None, - hot_cold_speciation=None, tyre_speciation=None, road_speciation=None, brake_speciation=None, - resuspension_speciation=None, temp_common_path=None, output_type=None, output_dir=None, - molecular_weights_path=None): - try: - settings.log_level_3 - except AttributeError: - settings.define_global_vars(0) - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - if timestep_type != 'hourly': - raise AttributeError('Traffic emissions are only developed for hourly timesteps. ' + - '\"{0}\" timestep type found.'.format(timestep_type)) - - self.output_type = output_type - self.output_dir = output_dir - - self.link_to_grid_csv = os.path.join(auxiliar_path, 'link_grid.csv') - self.crs = None # crs is the projection of the road links and it is set on the read_road_links function. - self.road_links = self.read_road_links(road_link_path, clipping, grid) - self.load = load - self.ef_common_path = ef_common_path - self.temp_common_path = temp_common_path - self.pollutant_list = pollutants_list - self.timestep_num = timestep_num - self.timestep_freq = timestep_freq - self.starting_date = date - # print date - self.add_local_date(date) - - self.speciation_map = speciation_map - self.hot_cold_speciation = hot_cold_speciation - self.tyre_speciation = tyre_speciation - self.road_speciation = road_speciation - self.brake_speciation = brake_speciation - self.resuspension_speciation = resuspension_speciation - - self.fleet_compo = self.read_fleet_compo(fleet_compo_path, vehicle_list) - self.speed_hourly = self.read_speed_hourly(speed_hourly_path) - self.monthly_profiles = pd.read_csv(monthly_profile_path) - self.daily_profiles = pd.read_csv(daily_profile_path) - self.hourly_profiles = pd.concat([ - pd.read_csv(hourly_mean_profile_path), - pd.read_csv(hourly_weekday_profile_path), - pd.read_csv(hourly_saturday_profile_path), - pd.read_csv(hourly_sunday_profile_path) - ]).reset_index() - - self.expanded = self.expand_road_links(timestep_type, timestep_num, timestep_freq) - - del self.fleet_compo, self.speed_hourly, self.monthly_profiles, self.daily_profiles, self.hourly_profiles - - self.molecular_weigths = pd.read_csv(molecular_weights_path, sep=';') - if settings.log_level_3: - print 'TIME -> Traffic.__init__: {0} s'.format(round(gettime() - st_time, 2)) - - return None - - def add_local_date(self, utc_date): - """ - Adds to the road links the starting date in local time. - This new column is called 'start_date'. - - :param utc_date: Starting date in UTC. - """ - import pytz - - self.add_timezones() - self.road_links.loc[:, 'utc'] = utc_date - self.road_links['start_date'] = self.road_links.groupby('timezone')['utc'].apply( - lambda x: pd.to_datetime(x).dt.tz_localize(pytz.utc).dt.tz_convert(x.name).dt.tz_localize(None)) - - del self.road_links['utc'], self.road_links['timezone'] - - return True - - def add_timezones(self): - """ - Finds and sets the timezone for each road link. - """ - # TODO calculate timezone from the centroid of each roadlink. - - self.road_links['timezone'] = 'Europe/Madrid' - - return True - - @staticmethod - def read_speed_hourly(path): - # TODO complete description - """ - Reads the speed hourly file. - - :param path: Path to the speed hourly file. - :type path: str: - - :return: ... - :rtype: Pandas.DataFrame - """ - df = pd.read_csv(path, sep=',', dtype=np.float32) - return df - - @staticmethod - def read_fleet_compo(path, vehicle_list): - df = pd.read_csv(path, sep=',') - if vehicle_list is not None: - df = df.loc[df['Code'].isin(vehicle_list), :] - return df - - @staticmethod - def parse_clip(str_clip): - import re - from shapely.geometry import Point, Polygon - # import FileNotFoundError - if str_clip[0] == os.path.sep: - if os.path.exists(str_clip): - df_clip = gpd.read_file(str_clip) - return df_clip - else: - warnings.warn(str_clip + ' file not found. Ignoring clipping.', Warning) - return None - else: - str_clip = re.split(' , | ,|, |,', str_clip) - lon_list = [] - lat_list = [] - for components in str_clip: - components = re.split(' ', components) - lon_list.append(float(components[0])) - lat_list.append(float(components[1])) - - if not((lon_list[0] == lon_list[-1]) and (lat_list[0] == lat_list[-1])): - lon_list.append(lon_list[0]) - lat_list.append(lat_list[0]) - - df_clip = gpd.GeoDataFrame(geometry=[Polygon([[p.x, p.y] for p in [Point(xy) for xy in zip(lon_list, lat_list)]])], crs={'init': 'epsg:4326'}) - return df_clip - return None - - def read_road_links(self, path, clipping, grid): - def chunk_road_links(df, nprocs): - def index_marks(nrows, nprocs): - max_len = int(nrows // nprocs) + 1 - min_len = max_len - 1 - max_num = nrows % nprocs - min_num = nprocs - max_num - index_list = [] - prev = 0 - for i in xrange(max_num): - prev += max_len - index_list.append(prev) - if min_num > 0: - for i in xrange(min_num - 1): - prev += min_len - index_list.append(prev) - - return index_list - - def split(dfm, nprocs): - indices = index_marks(dfm.shape[0], nprocs) - return np.split(dfm, indices) - - chunks_aux = split(df, nprocs) - return chunks_aux - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - if settings.rank == 0: - df = gpd.read_file(path) - - # Clipping - if clipping is not None: - clip = self.parse_clip(clipping) - if clip is not None: - df = gpd.sjoin(df, clip.to_crs(df.crs), how="inner", op='intersects') - del clip - else: - warnings.warn('Clipping type not found . Ignoring clipping.', Warning) - clipping = None - - if clipping is None: - shape_grid = grid.to_shapefile() - clip = gpd.GeoDataFrame(geometry=[shape_grid.unary_union], crs=shape_grid.crs) - df = gpd.sjoin(df, clip.to_crs(df.crs), how="inner", op='intersects') - - # Filtering road links to CONSiderate. - df['CONS'] = df['CONS'].astype(np.int16) - df = df[df['CONS'] != 0] - df = df[df['aadt'] > 0] - - # TODO Manu update shapefile replacing NULL values on 'aadt_m-mn' column - df = df.loc[df['aadt_m_mn'] != 'NULL', :] - - # Adding identificator of road link - df['Link_ID'] = xrange(len(df)) - - del df['Adminis'], df['CCAA'], df['CONS'], df['NETWORK_ID'] - del df['Province'], df['Road_name'] - - # Deleting unused columns - del df['aadt_m_sat'], df['aadt_m_sun'], df['aadt_m_wd'], df['Source'] - - chunks = chunk_road_links(df, settings.size) - else: - chunks = None - settings.comm.Barrier() - df = settings.comm.scatter(chunks, root=0) - df = df.to_crs({'init': 'epsg:4326'}) - - self.crs = df.crs - - # Correcting percentages - df['PcMoto'] = df['PcMoto'] / 100 - df['PcHeavy'] = df['PcHeavy'] / 100 - df['PcMoped'] = df['PcMoped'] / 100 - df['PcLight'] = 1 - (df['PcMoto'] + df['PcHeavy'] + df['PcMoped']) - - # Road_type int to string - df['Road_type'] = df['Road_type'].astype(str) - df.loc[df['Road_type'] == '0', 'Road_type'] = 'Highway' - df.loc[df['Road_type'] == '1', 'Road_type'] = 'Rural' - df.loc[df['Road_type'] == '2', 'Road_type'] = 'Urban Off Peak' - df.loc[df['Road_type'] == '3', 'Road_type'] = 'Urban Peak' - - # TODO Read with units types - df['road_grad'] = df['road_grad'].astype(float) - - # Check if percents are ok - if len(df[df['PcLight'] < 0]) is not 0: - print 'ERROR: PcLight < 0' - exit(1) - - if self.output_type == 'R-LINE': - self.write_rline_roadlinks(df) - - if settings.log_level_3: - print 'TIME -> Traffic.read_road_links: {0} s'.format(round(gettime() - st_time, 2)) - - return df - - def read_ef(self, emission_type, pollutant_name): - """ - Reads the file that contains the necessary emission factor for the current pollutant and emission type. - - Depending on the emission tyme the file contain different columns. - - :param emission_type: Type of the emission. It can be hot, cold, tyre, road, brake or resuspension. - :type emission_type: str - - :param pollutant_name: Name of the pollutant to read their emission factor. - :type pollutant_name:str - - :return: Returns the readed emission factor in DataFrame mode. - :rtype: Pandas.DataFrame - """ - ef_path = os.path.join(self.ef_common_path, '{0}_{1}.csv'.format(emission_type, pollutant_name)) - df = pd.read_csv(ef_path, sep=';') - - # Pollutants different to NH3 - if pollutant_name != 'nh3': - - del df['Copert_V_name'] - - # For hot emission factors - if emission_type == 'hot': - df = df[(df['Load'] == self.load) | (df['Load'].isnull())] - - df.loc[df['Technology'].isnull(), 'Technology'] = '' - df = df[df['Technology'] != 'EGR'] - - del df['Technology'], df['Load'] - - # Split the EF file into small DataFrames divided by column Road.Slope and Mode restrictions. - df_code_slope_road = df[df['Road.Slope'].notnull() & df['Mode'].notnull()] - df_code_slope = df[df['Road.Slope'].notnull() & (df['Mode'].isnull())] - df_code_road = df[df['Road.Slope'].isnull() & (df['Mode'].notnull())] - df_code = df[df['Road.Slope'].isnull() & (df['Mode'].isnull())] - - # Checks that the splited DataFrames contain the full DataFrame - if (len(df_code_slope_road) + len(df_code_slope) + len(df_code_road) + len(df_code)) != len(df): - # TODO check that error - print 'ERROR in blablavbla' - - return df_code_slope_road, df_code_slope, df_code_road, df_code - elif emission_type == 'cold' or emission_type == 'tyre' or emission_type == 'road' or \ - emission_type == 'brake' or emission_type == 'resuspension': - return df - # NH3 pollutant - else: - del df['Copert_V_name'] - # Specific case for cold NH3 emission factors that needs the hot emission factors and the cold ones. - if emission_type == 'cold': - df_hot = self.read_ef('hot', pollutant_name) - df_hot.columns = [x + '_hot' for x in df_hot.columns.values] - - df = df.merge(df_hot, left_on=['CODE_HERMESv3', 'Mode'], right_on=['CODE_HERMESv3_hot', 'Mode_hot'], how='left') - - del df['Cmileage_hot'], df['Mode_hot'], df['CODE_HERMESv3_hot'] - - return df - return None - - def read_Mcorr_file(self, pollutant_name): - try: - df_path = os.path.join(self.ef_common_path, 'mcorr_{0}.csv'.format(pollutant_name)) - # print df_path - df = pd.read_csv(df_path, sep=';') - del df['Copert_V_name'] - except: - warnings.warn('No mileage correction applied to {0}'.format(pollutant_name)) - # print 'WARNING: No mileage correction applied to {0}'.format(pollutant_name) - return None - return df - - @staticmethod - def read_temperature(lon_min, lon_max, lat_min, lat_max, temp_dir, date, tstep_num, tstep_freq): - """ - Reads the temperature from the ERA5 tas value. - It will return only the involved cells of the NetCDF in DataFrame format. - - To clip the global NetCDF to the desired region it is needed the minimum and maximum value of the latitudes and - longitudes of the centroids of all the road links. - - :param lon_min: Minimum longitude of the centroid of the road links. - :type lon_min: float - - :param lon_max: Maximum longitude of the centroid of the road links. - :type lon_max: float - - :param lat_min: Minimum latitude of the centroid of the road links. - :type lat_min: float - - :param lat_max: Maximum latitude of the centroid of the road links. - :type lat_max: float - - :return: Temperature, centroid of the cell and cell identificator (REC). - Each time step is each column with the name t_. - :rtype: geopandas.GeoDataFrame - """ - from netCDF4 import Dataset - import cf_units - from shapely.geometry import Point - - path = os.path.join(temp_dir, 'tas_{0}{1}.nc'.format(date.year, str(date.month).zfill(2))) - print 'Getting temperature from {0}'.format(path) - - nc = Dataset(path, mode='r') - lat_o = nc.variables['latitude'][:] - lon_o = nc.variables['longitude'][:] - time = nc.variables['time'] - # From time array to list of dates. - time_array = cf_units.num2date(time[:], time.units, cf_units.CALENDAR_STANDARD) - i_time = np.where(time_array == date)[0][0] - - # Correction to set the longitudes from -180 to 180 instead of from 0 to 360. - if lon_o.max() > 180: - lon_o[lon_o > 180] -= 360 - - # Finds the array positions for the clip. - i_min, i_max, j_min, j_max = Traffic.find_temperature_index(lon_o, lat_o, lon_min, lon_max, lat_min, lat_max) - - # Clips the lat lons - lon_o = lon_o[i_min:i_max] - lat_o = lat_o[j_min:j_max] - - # From 1D to 2D - lat = np.array([lat_o[:]] * len(lon_o[:])).T.flatten() - lon = np.array([lon_o[:]] * len(lat_o[:])).flatten() - del lat_o, lon_o - - # Reads the tas variable of the xone and the times needed. - # tas = nc.variables['tas'][i_time:i_time + (self.timestep_num*self.timestep_freq): self.timestep_freq, i_min:i_max, j_min:j_max] - tas = nc.variables['tas'][i_time:i_time + (tstep_num*tstep_freq): tstep_freq, j_min:j_max, i_min:i_max] - - nc.close() - # That condition is fot the cases that the needed temperature is in a different NetCDF. - while len(tas) < tstep_num: - path = os.path.join(temp_dir, 'tas_{0}{1}.nc'.format(date.year, str(date.month +1).zfill(2))) - print 'Getting temperature from {0}'.format(path) - nc = Dataset(path, mode='r') - # TODO timestep_freq != 1 - i_time = 0 - new_tas = nc.variables['tas'][i_time:i_time + ((tstep_num - len(tas))*tstep_freq): tstep_freq, j_min:j_max, i_min:i_max] - - tas = np.concatenate([tas, new_tas]) - - nc.close() - - # From Kelvin to Celsius degrees - tas = (tas - 273.15).reshape((tas.shape[0], tas.shape[1] * tas.shape[2])) - - # Creates the GeoDataFrame - df = gpd.GeoDataFrame(tas.T, geometry=[Point(xy) for xy in zip(lon, lat)]) - df.columns = ['t_{0}'.format(x) for x in df.columns.values[:-1]] + ['geometry'] - df.loc[:, 'REC'] = df.index - - return df - - @ staticmethod - def find_temperature_index(lon, lat, lon_min, lon_max, lat_min, lat_max): - # print lon, lat, lon_min, lon_max, lat_min, lat_max - - aux = lon - lon_min - aux[aux > 0] = np.nan - i_min = np.where(aux == np.nanmax(aux))[0][0] - - aux = lon - lon_max - - aux[aux < 0] = np.nan - - i_max = np.where(aux == np.nanmin(aux))[0][0] - - aux = lat - lat_min - aux[aux > 0] = np.nan - j_max = np.where(aux == np.nanmax(aux))[0][0] - - aux = lat - lat_max - aux[aux < 0] = np.nan - j_min = np.where(aux == np.nanmin(aux))[0][0] - - return i_min, i_max+1, j_min, j_max+1 - - @staticmethod - def update_fleet_value(df): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - # Calculating fleet value by fleet class - df.loc[:, 'Fleet_value'] = df['Fleet_value'] * df['aadt'] - - df.loc[df['Fleet_Class'] == 'light_veh', 'Fleet_value'] = df['PcLight'] * df['Fleet_value'] - df.loc[df['Fleet_Class'] == 'heavy_veh', 'Fleet_value'] = df['PcHeavy'] * df['Fleet_value'] - df.loc[df['Fleet_Class'] == 'motos', 'Fleet_value'] = df['PcMoto'] * df['Fleet_value'] - df.loc[df['Fleet_Class'] == 'mopeds', 'Fleet_value'] = df['PcMoped'] * df['Fleet_value'] - - for link_id, aux_df in df.groupby('Link_ID'): - aadt = round(aux_df['aadt'].min(), 1) - fleet_value = round(aux_df['Fleet_value'].sum(), 1) - if aadt != fleet_value: - print 'link_ID: {0} aadt: {1} sum_fleet: {2}'.format(link_id, aadt, fleet_value) - - # Drop 0 values - df = df[df['Fleet_value'] > 0] - - # Deleting unused columns - del df['aadt'], df['PcLight'], df['PcHeavy'], df['PcMoto'], df['PcMoped'], df['Fleet_Class'] - if settings.log_level_3: - print 'TIME -> Traffic.update_fleet_value: {0} s'.format(round(gettime() - st_time, 2)) - return df - - @staticmethod - def calculate_timedelta(timestep_type, num_tstep, timestep_freq): - from datetime import timedelta - - if timestep_type == 'hourly': - delta = timedelta(hours=timestep_freq * num_tstep) - else: - print 'ERROR: only hourly emission permited' - sys.exit(1) - return pd.Timedelta(delta) - - def calculate_hourly_speed(self, df): - - # speed_aux = pd.DataFrame(self.speed_hourly.loc[self.speed_hourly['PROFILE_ID'].isin(np.unique(df['profile_id'].values))]) - - df = df.merge(self.speed_hourly, left_on='profile_id', right_on='PROFILE_ID', how='left') - df['speed'] = df.groupby('hour').apply(lambda x: x[[str(x.name)]]) - - # df.loc[df['profile_id'] != 1, 'speed'] = df.groupby('hour').apply(lambda x: x[[str(x.name)]]) - # df.loc[df['profile_id'] != 1, 'speed'] = df['speed'] * df['speed_mean'] - # df.loc[df['profile_id'] == 1, 'speed'] = df['speed_mean'] - # # df.reset_index() - return df['speed'] * df['speed_mean'] - - def calculate_temporal_factor(self, df): - import calendar - - def get_hourly_id_from_weekday(weekday): - if weekday <= 4: - return 'aadt_h_wd' - elif weekday == 5: - return 'aadt_h_sat' - elif weekday == 6: - return 'aadt_h_sun' - else: - print 'ERROR: Weekday not found' - exit() - - # Monthly factor - df = df.merge(self.monthly_profiles, left_on='aadt_m_mn', right_on='PROFILE_ID', how='left') - df['FM'] = df.groupby('month').apply(lambda x: x[[calendar.month_abbr[x.name].upper()]]) - # del df['JAN'], df['FEB'], df['MAR'], df['APR'], df['MAY'], df['JUN'] - # del df['JUL'], df['AUG'], df['SEP'], df['OCT'], df['NOV'], df['DEC'] - # del df['month'], df['PROFILE_ID'], df['aadt_m_mn'] - - # print df - - # Daily factor - df = df.merge(self.daily_profiles, left_on='aadt_week', right_on='PROFILE_ID', how='left') - df['FD'] = df.groupby('week_day').apply(lambda x: x[[calendar.day_name[x.name].upper()]]) - # del df['MONDAY'], df['TUESDAY'], df['WEDNESDAY'], df['THURSDAY'], df['FRIDAY'] - # del df['SATURDAY'], df['SUNDAY'] - # del df['PROFILE_ID'], df['aadt_week'] - - # print df - - # Hourly factor - # print self.hourly_profiles - df['hourly_profile'] = df.groupby('week_day').apply(lambda x: x[[get_hourly_id_from_weekday(x.name)]]) - df.loc[df['hourly_profile'] == '', 'hourly_profile'] = df['aadt_h_mn'] - - df['hourly_profile'] = df['hourly_profile'].astype(str) - self.hourly_profiles['PROFILE_ID'] = self.hourly_profiles['PROFILE_ID'].astype(str) - - df = df.merge(self.hourly_profiles, left_on='hourly_profile', right_on='PROFILE_ID', how='left') - df['FH'] = df.groupby('hour').apply(lambda x: x[[str(x.name)]]) - - return df['FM'] * df['FD'] * df['FH'] - - def calculate_time_dependent_values(self, df, timestep_type, timestep_num, timestep_freq): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - df.reset_index(inplace=True) - for tstep in xrange(timestep_num): - # Finding weekday - # 0 -> Monday; 6 -> Sunday - df.loc[:, 'month'] = (df['start_date'] + self.calculate_timedelta(timestep_type, tstep, timestep_freq)).dt.month - df.loc[:, 'week_day'] = (df['start_date'] + self.calculate_timedelta(timestep_type, tstep, timestep_freq)).dt.weekday - df.loc[:, 'hour'] = (df['start_date'] + self.calculate_timedelta(timestep_type, tstep, timestep_freq)).dt.hour - - # Selecting speed_mean - df.loc[df['week_day'] <= 4, 'speed_mean'] = df['sp_wd'] - df.loc[df['week_day'] > 4, 'speed_mean'] = df['sp_we'] - - # Selecting speed profile_id - df.loc[df['week_day'] == 0, 'profile_id'] = df['sp_hour_mo'] - df.loc[df['week_day'] == 1, 'profile_id'] = df['sp_hour_tu'] - df.loc[df['week_day'] == 2, 'profile_id'] = df['sp_hour_we'] - df.loc[df['week_day'] == 3, 'profile_id'] = df['sp_hour_th'] - df.loc[df['week_day'] == 4, 'profile_id'] = df['sp_hour_fr'] - df.loc[df['week_day'] == 5, 'profile_id'] = df['sp_hour_sa'] - df.loc[df['week_day'] == 6, 'profile_id'] = df['sp_hour_su'] - - # Selecting flat profile for 0 and nan's - df.loc[df['profile_id'] == 0, 'profile_id'] = 1 - df.loc[df['profile_id'] == np.nan, 'profile_id'] = 1 - - # Calculating speed by tstep - speed_column_name = 'v_{0}'.format(tstep) - df[speed_column_name] = self.calculate_hourly_speed(df.loc[:, ['hour', 'speed_mean', 'profile_id']]) - - factor_column_name = 'f_{0}'.format(tstep) - - df.loc[:, factor_column_name] = self.calculate_temporal_factor( - df.loc[:, ['month', 'week_day', 'hour', 'aadt_m_mn', 'aadt_week', 'aadt_h_mn', 'aadt_h_wd', 'aadt_h_sat', 'aadt_h_sun']]) - - # Deleting time variables - - del df['month'], df['week_day'], df['hour'], df['profile_id'], df['speed_mean'] - del df['sp_wd'], df['sp_we'], df['index'] - del df['sp_hour_mo'], df['sp_hour_tu'], df['sp_hour_we'], df['sp_hour_th'], df['sp_hour_fr'] - del df['sp_hour_sa'], df['sp_hour_su'] - del df['aadt_m_mn'], df['aadt_h_mn'], df['aadt_h_wd'], df['aadt_h_sat'], df['aadt_h_sun'], df['aadt_week'] - del df['start_date'] - - if settings.log_level_3: - print 'TIME -> Traffic.calculate_time_dependent_values: {0} s'.format(round(gettime() - st_time, 2)) - - return df - - def expand_road_links(self, timestep_type, timestep_num, timestep_freq): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - # Expands each road link by any vehicle type that the selected road link has. - df_list = [] - road_link_aux = self.road_links.copy() - del road_link_aux['geometry'] - for zone, compo_df in road_link_aux.groupby('fleet_comp'): - fleet = self.find_fleet(zone) - df_aux = pd.merge(compo_df, fleet, how='left', on='fleet_comp') - df_list.append(df_aux) - - df = pd.concat(df_list, ignore_index=True) - - del df['fleet_comp'] - - # df.to_csv('/home/Earth/ctena/Models/HERMESv3/OUT/2_pre_expanded.csv') - df = self.update_fleet_value(df) - df = self.calculate_time_dependent_values(df, timestep_type, timestep_num, timestep_freq) - - if settings.log_level_3: - print 'TIME -> Traffic.expand_road_links: {0} s'.format(round(gettime() - st_time, 2)) - return df - - def find_fleet(self, zone): - - # print self.fleet_compo - try: - fleet = self.fleet_compo[['Code', 'Class', zone]] - except KeyError as e: - raise KeyError(e.message + ' of the fleet_compo file') - fleet.columns = ['Fleet_Code', 'Fleet_Class', 'Fleet_value'] - - fleet = fleet[fleet['Fleet_value'] > 0] - - fleet['fleet_comp'] = zone - - return fleet - - def calculate_hot(self): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - expanded_aux = self.expanded.copy().reset_index() - - for pollutant in self.pollutant_list: - if pollutant != 'nh3': - - ef_code_slope_road, ef_code_slope, ef_code_road, ef_code = self.read_ef('hot', pollutant) - - df_code_slope_road = expanded_aux.merge(ef_code_slope_road, left_on=['Fleet_Code', 'road_grad', 'Road_type'], right_on=['CODE_HERMESv3', 'Road.Slope', 'Mode'], how='inner') - df_code_slope = expanded_aux.merge(ef_code_slope, left_on=['Fleet_Code', 'road_grad'], right_on=['CODE_HERMESv3', 'Road.Slope'], how='inner') - df_code_road = expanded_aux.merge(ef_code_road, left_on=['Fleet_Code', 'Road_type'], right_on=['CODE_HERMESv3', 'Mode'], how='inner') - df_code = expanded_aux.merge(ef_code, left_on=['Fleet_Code'], right_on=['CODE_HERMESv3'], how='inner') - - del ef_code_slope_road, ef_code_slope, ef_code_road, ef_code - - expanded_aux = pd.concat([df_code_slope_road, df_code_slope, df_code_road, df_code])#.set_index('index') - - del expanded_aux['CODE_HERMESv3'], expanded_aux['Road.Slope'], expanded_aux['Mode'] - try: - del expanded_aux['index'] - except: - pass - else: - ef_code_road = self.read_ef('hot', pollutant) - expanded_aux = expanded_aux.merge(ef_code_road, left_on=['Fleet_Code', 'Road_type'], right_on=['CODE_HERMESv3', 'Mode'], how='inner') - - del expanded_aux['CODE_HERMESv3'], expanded_aux['Mode'] - - # Warnings and Errors - original_ef_profile = self.expanded['Fleet_Code'].unique() - calculated_ef_profiles = expanded_aux['Fleet_Code'].unique() - resta_1 = [item for item in original_ef_profile if item not in calculated_ef_profiles] # Warining - resta_2 = [item for item in calculated_ef_profiles if item not in original_ef_profile] # Error - - if len(resta_1) > 0: - warnings.warn('Exists some fleet codes that not appear on the EF file: {0}'.format(resta_1), Warning) - if len(resta_2) > 0: - raise ImportError('Exists some fleet codes duplicateds on the EF file: {0}'.format(resta_2)) - - m_corr = self.read_Mcorr_file(pollutant) - if m_corr is not None: - expanded_aux = expanded_aux.merge(m_corr, left_on='Fleet_Code', right_on='CODE_HERMESv3', how='left') - del expanded_aux['CODE_HERMESv3'] - # print expanded_aux - for tstep in xrange(self.timestep_num): - ef_name = 'ef_{0}_{1}'.format(pollutant, tstep) - p_column = '{0}_{1}'.format(pollutant, tstep) - if pollutant != 'nh3': - expanded_aux['v_aux'] = expanded_aux['v_{0}'.format(tstep)] - # print tstep, expanded_aux.loc[:, ['v_aux', 'Min.Speed']] - # print len(expanded_aux.loc[expanded_aux['v_aux'] < expanded_aux['Min.Speed'], 'v_aux']) - # print expanded_aux - # print expanded_aux.loc[expanded_aux['v_aux'] < expanded_aux['Min.Speed'], 'v_aux'] - # print expanded_aux.loc[expanded_aux['v_aux'] < expanded_aux['Min.Speed'], 'Min.Speed'].index - expanded_aux.loc[expanded_aux['v_aux'] < expanded_aux['Min.Speed'], 'v_aux'] = expanded_aux.loc[expanded_aux['v_aux'] < expanded_aux['Min.Speed'], 'Min.Speed'] - expanded_aux.loc[expanded_aux['v_aux'] > expanded_aux['Max.Speed'], 'v_aux'] = expanded_aux.loc[expanded_aux['v_aux'] > expanded_aux['Max.Speed'], 'Max.Speed'] - - # EF - expanded_aux.loc[:, ef_name] = ((expanded_aux.Alpha * expanded_aux.v_aux**2 + expanded_aux.Beta*expanded_aux.v_aux + expanded_aux.Gamma + (expanded_aux.Delta/expanded_aux.v_aux))/(expanded_aux.Epsilon*expanded_aux.v_aux**2 + expanded_aux.Zita*expanded_aux.v_aux + expanded_aux.Hta))*(1 - expanded_aux.RF)*(expanded_aux.PF*expanded_aux['T']/expanded_aux.Q) - else: - expanded_aux.loc[:, ef_name] = ((expanded_aux['a'] * expanded_aux['Cmileage'] + expanded_aux['b'])*(expanded_aux['EFbase'] * expanded_aux['TF']))/1000 - - - # Mcorr - # m_corr = self.read_Mcorr_file(pollutant) - if m_corr is not None: - # expanded_aux = expanded_aux.merge(m_corr) - - expanded_aux.loc[expanded_aux['v_aux'] <= 19., 'Mcorr'] = expanded_aux.A_urban*expanded_aux['M'] + expanded_aux.B_urban - expanded_aux.loc[expanded_aux['v_aux'] >= 63., 'Mcorr'] = expanded_aux.A_road * expanded_aux['M'] + expanded_aux.B_road - expanded_aux.loc[(expanded_aux['v_aux'] > 19.) & (expanded_aux['v_aux'] < 63.), 'Mcorr'] = (expanded_aux.A_urban*expanded_aux['M'] + expanded_aux.B_urban) +((expanded_aux.v_aux - 19)*((expanded_aux.A_road * expanded_aux['M'] + expanded_aux.B_road) - (expanded_aux.A_urban*expanded_aux['M'] + expanded_aux.B_urban)))/44. - expanded_aux.loc[expanded_aux['Mcorr'].isnull(), 'Mcorr'] = 1 - else: - expanded_aux.loc[:, 'Mcorr'] = 1 - - # Full formula - expanded_aux.loc[:, p_column] = expanded_aux['Fleet_value'] * expanded_aux[ef_name] * expanded_aux['Mcorr'] * expanded_aux['f_{0}'.format(tstep)] - # expanded_aux.to_csv('/home/Earth/ctena/Models/HERMESv3/OUT/hot_expanded_{0}_{1}.csv'.format(pollutant,tstep)) - del expanded_aux[ef_name], expanded_aux['Mcorr'] - - if pollutant != 'nh3': - del expanded_aux['v_aux'] - del expanded_aux['Min.Speed'], expanded_aux['Max.Speed'], expanded_aux['Alpha'], expanded_aux['Beta'] - del expanded_aux['Gamma'], expanded_aux['Delta'], expanded_aux['Epsilon'], expanded_aux['Zita'] - del expanded_aux['Hta'], expanded_aux['RF'], expanded_aux['Q'], expanded_aux['PF'], expanded_aux['T'] - else: - del expanded_aux['a'], expanded_aux['Cmileage'], expanded_aux['b'], expanded_aux['EFbase'], expanded_aux['TF'] - - if m_corr is not None: - del expanded_aux['A_urban'], expanded_aux['B_urban'], expanded_aux['A_road'], expanded_aux['B_road'], expanded_aux['M'] - - # del expanded_aux['Fleet_value'], expanded_aux[ef_name], expanded_aux['Mcorr'], expanded_aux['f_{0}'.format(tstep)] - - del expanded_aux['road_grad'] - - for tstep in xrange(self.timestep_num): - del expanded_aux['f_{0}'.format(tstep)] - - if settings.log_level_3: - print 'TIME -> Traffic.calculate_hot: {0} s'.format(round(gettime() - st_time, 2)) - return expanded_aux - - def calculate_cold(self, hot_expanded): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - cold_links = self.road_links.copy() - - del cold_links['aadt'], cold_links['PcHeavy'], cold_links['PcMoto'], cold_links['PcMoped'], cold_links['sp_wd'] - del cold_links['sp_we'], cold_links['sp_hour_su'], cold_links['sp_hour_mo'], cold_links['sp_hour_tu'] - del cold_links['sp_hour_we'], cold_links['sp_hour_th'], cold_links['sp_hour_fr'], cold_links['sp_hour_sa'] - del cold_links['Road_type'], cold_links['aadt_m_mn'], cold_links['aadt_h_mn'], cold_links['aadt_h_wd'] - del cold_links['aadt_h_sat'], cold_links['aadt_h_sun'], cold_links['aadt_week'], cold_links['fleet_comp'] - del cold_links['road_grad'], cold_links['PcLight'], cold_links['start_date'] - - cold_links.loc[:, 'centroid'] = cold_links['geometry'].centroid - link_lons = cold_links['geometry'].centroid.x - link_lats = cold_links['geometry'].centroid.y - - temperature = self.read_temperature(link_lons.min(), link_lons.max(), link_lats.min(), link_lats.max(), self.temp_common_path, self.starting_date, self.timestep_num, self.timestep_freq) - - print 'Nearest time ...', - st_time = gettime() - unary_union = temperature.unary_union - cold_links['REC'] = cold_links.apply(self.nearest, geom_union=unary_union, df1=cold_links, df2=temperature, - geom1_col='centroid', src_column='REC', axis=1) - del cold_links['geometry'], cold_links['centroid'], temperature['geometry'] - - cold_links = cold_links.merge(temperature, left_on='REC', right_on='REC', how='left') - - del cold_links['REC'] - - print ' {0} s'.format(round(gettime() - st_time, 2)) - - c_expanded = hot_expanded.merge(cold_links, left_on='Link_ID', right_on='Link_ID', how='left') - - # cold_df = c_expanded.loc[:, ['Link_ID', 'Fleet_Code']] - # cold_df.set_index('Link_ID', inplace=True) - df_list = [] - for pollutant in self.pollutant_list: - - ef_cold = self.read_ef('cold', pollutant) - - if pollutant != 'nh3': - ef_cold.loc[ef_cold['Tmin'].isnull(), 'Tmin'] = -999 - ef_cold.loc[ef_cold['Tmax'].isnull(), 'Tmax'] = 999 - ef_cold.loc[ef_cold['Min.Speed'].isnull(), 'Min.Speed'] = -999 - ef_cold.loc[ef_cold['Max.Speed'].isnull(), 'Max.Speed'] = 999 - - c_expanded_p = c_expanded.merge(ef_cold, left_on=['Fleet_Code', 'Road_type'], - right_on=['CODE_HERMESv3', 'Mode'], how='inner') - cold_exp_p_aux = c_expanded_p.copy() - - del cold_exp_p_aux['index_right_x'], cold_exp_p_aux['Road_type'], cold_exp_p_aux['Fleet_value'] - del cold_exp_p_aux['CODE_HERMESv3'] - # df_list_aux = [] - for tstep in xrange(self.timestep_num): - v_column = 'v_{0}'.format(tstep) - p_column = '{0}_{1}'.format(pollutant, tstep) - t_column = 't_{0}'.format(tstep) - if pollutant != 'nh3': - cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[t_column] >= cold_exp_p_aux['Tmin'], :] - cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[t_column] < cold_exp_p_aux['Tmax'], :] - cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[v_column] >= cold_exp_p_aux['Min.Speed'], :] - cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[v_column] < cold_exp_p_aux['Max.Speed'], :] - - # Beta - cold_exp_p_aux.loc[:, 'Beta'] = (0.6474 - 0.02545*cold_exp_p_aux['ltrip'] - (0.00974 - 0.000385*cold_exp_p_aux['ltrip'])*cold_exp_p_aux[t_column])*cold_exp_p_aux['bc'] - if pollutant != 'nh3': - cold_exp_p_aux.loc[:, 'cold_hot'] = cold_exp_p_aux['A'] * cold_exp_p_aux[v_column] + cold_exp_p_aux['B'] * cold_exp_p_aux[t_column] + cold_exp_p_aux['C'] - - else: - cold_exp_p_aux.loc[:, 'cold_hot'] = ((cold_exp_p_aux['a'] * cold_exp_p_aux['Cmileage'] + cold_exp_p_aux['b']) * cold_exp_p_aux['EFbase'] * cold_exp_p_aux['TF'])/((cold_exp_p_aux['a_hot'] * cold_exp_p_aux['Cmileage'] + cold_exp_p_aux['b_hot']) * cold_exp_p_aux['EFbase_hot'] * cold_exp_p_aux['TF_hot']) - cold_exp_p_aux.loc[cold_exp_p_aux['cold_hot'] < 1, 'cold_hot'] = 1 - - # Formula Cold emissions - cold_exp_p_aux.loc[:, p_column] = cold_exp_p_aux[p_column] * cold_exp_p_aux['Beta'] * (cold_exp_p_aux['cold_hot'] - 1) - # print pollutant - df_list.append((cold_exp_p_aux.loc[:, ['Link_ID', 'Fleet_Code', p_column]]).set_index(['Link_ID', 'Fleet_Code'])) - - try: - cold_df = pd.concat(df_list, axis=1, ).reset_index() - except Exception: - error_fleet_code = [] - for df in df_list: - orig = list(df.index.values) - uni = list(np.unique(df.index.values)) - - for o in orig: - try: - uni.remove(o) - except: - error_fleet_code.append(o) - raise IndexError('There are duplicated values for {0} codes in the cold EF files.'.format(error_fleet_code)) - - for tstep in xrange(self.timestep_num): - if 'pm' in self.pollutant_list: - cold_df.loc[:, 'pm10_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] - cold_df.loc[:, 'pm25_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] - del cold_df['pm_{0}'.format(tstep)] - - if 'voc' in self.pollutant_list and 'ch4' in self.pollutant_list: - cold_df.loc[:, 'nmvoc_{0}'.format(tstep)] = cold_df['voc_{0}'.format(tstep)] - cold_df['ch4_{0}'.format(tstep)] - del cold_df['voc_{0}'.format(tstep)], cold_df['ch4_{0}'.format(tstep)] - else: - warnings.warn("nmvoc emissions cannot be estimated because voc or ch4 are not selected in the pollutant list.") - - cold_df = self.speciate_traffic(cold_df, self.hot_cold_speciation) - - # del cold_df['Fleet_Code'] - # - # cold_df = cold_df.groupby(['tstep', 'Link_ID']).sum() - - if settings.log_level_3: - print 'TIME -> Traffic.calculate_cold: {0} s'.format(round(gettime() - st_time, 2)) - - return cold_df - - def compact_hot_expanded(self, expanded): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - columns_to_delete = ['Road_type', 'Fleet_value'] + ['v_{0}'.format(x) for x in xrange(self.timestep_num)] - for column_name in columns_to_delete: - del expanded[column_name] - - for tstep in xrange(self.timestep_num): - if 'pm' in self.pollutant_list: - expanded.loc[:, 'pm10_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] - expanded.loc[:, 'pm25_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] - del expanded['pm_{0}'.format(tstep)] - - if 'voc' in self.pollutant_list and 'ch4' in self.pollutant_list: - expanded.loc[:, 'nmvoc_{0}'.format(tstep)] = expanded['voc_{0}'.format(tstep)] - expanded['ch4_{0}'.format(tstep)] - del expanded['voc_{0}'.format(tstep)], expanded['ch4_{0}'.format(tstep)] - else: - warnings.warn( - "nmvoc emissions cannot be estimated because voc or ch4 are not selected in the pollutant list.") - - #expanded = self.speciate_traffic_old(expanded, self.hot_cold_speciation) - compacted = self.speciate_traffic(expanded, self.hot_cold_speciation) - - # del expanded['Fleet_Code'] - # - # df = expanded.groupby(['tstep', 'Link_ID']).sum() - - if settings.log_level_3: - print 'TIME -> Traffic.compact_hot_expanded: {0} s'.format(round(gettime() - st_time, 2)) - - return compacted - - def calculate_tyre_wear(self): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - pollutants = ['pm'] - for pollutant in pollutants: - ef_tyre = self.read_ef('tyre', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='CODE_HERMESv3', how='inner') - del df['road_grad'], df['Road_type'], df['CODE_HERMESv3'] - for tstep in xrange(self.timestep_num): - p_column = '{0}_{1}'.format(pollutant, tstep) - f_column = 'f_{0}'.format(tstep) - v_column = 'v_{0}'.format(tstep) - df.loc[df[v_column] < 40, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * 1.39 - df.loc[(df[v_column] >= 40) & (df[v_column] <= 90), p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * (-0.00974* df[v_column]+1.78) - df.loc[df[v_column] > 90, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * 0.902 - - # from PM to PM10 & PM2.5 - if pollutant == 'pm': - df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.6 - df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.42 - del df[p_column] - - # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange(self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] - - df = self.speciate_traffic(df, self.tyre_speciation) - - #del df['Fleet_Code'] - - if settings.log_level_3: - print 'TIME -> Traffic.calculate_tyre_wear: {0} s'.format(round(gettime() - st_time, 2)) - - return df #.groupby(['tstep', 'Link_ID']).sum() - - def calculate_brake_wear(self): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - pollutants = ['pm'] - for pollutant in pollutants: - ef_tyre = self.read_ef('brake', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='CODE_HERMESv3', how='inner') - del df['road_grad'], df['Road_type'], df['CODE_HERMESv3'] - for tstep in xrange(self.timestep_num): - p_column = '{0}_{1}'.format(pollutant, tstep) - f_column = 'f_{0}'.format(tstep) - v_column = 'v_{0}'.format(tstep) - df.loc[df[v_column] < 40, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * 1.67 - df.loc[(df[v_column] >= 40) & (df[v_column] <= 95), p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * (-0.027 * df[v_column] + 2.75) - df.loc[df[v_column] > 95, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * 0.185 - - # from PM to PM10 & PM2.5 - if pollutant == 'pm': - df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.98 - df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.39 - del df[p_column] - - # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange(self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] - - df = self.speciate_traffic(df, self.brake_speciation) - - # del df['Fleet_Code'] - - if settings.log_level_3: - print 'TIME -> Traffic.calculate_brake_wear: {0} s'.format(round(gettime() - st_time, 2)) - - return df #.groupby(['tstep', 'Link_ID']).sum() - - def calculate_road_wear(self): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - pollutants = ['pm'] - for pollutant in pollutants: - ef_tyre = self.read_ef('road', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='CODE_HERMESv3', how='inner') - del df['road_grad'], df['Road_type'], df['CODE_HERMESv3'] - for tstep in xrange(self.timestep_num): - p_column = '{0}_{1}'.format(pollutant, tstep) - f_column = 'f_{0}'.format(tstep) - df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] - - # from PM to PM10 & PM2.5 - if pollutant == 'pm': - df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.5 - df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.27 - del df[p_column] - - # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange(self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] - - df = self.speciate_traffic(df, self.road_speciation) - - # del df['Fleet_Code'] - - if settings.log_level_3: - print 'TIME -> Traffic.calculate_road_wear: {0} s'.format(round(gettime() - st_time, 2)) - - return df # .groupby(['tstep', 'Link_ID']).sum() - - def calculate_resuspension(self): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - pollutants = ['pm'] - for pollutant in pollutants: - ef_tyre = self.read_ef('resuspension', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='CODE_HERMESv3', how='inner') - del df['road_grad'], df['Road_type'], df['CODE_HERMESv3'] - for tstep in xrange(self.timestep_num): - p_column = '{0}_{1}'.format(pollutant, tstep) - f_column = 'f_{0}'.format(tstep) - pr_column = 'PR_{0}'.format(tstep) - # TODO Calculate PR for each tstep - df.loc[:, pr_column] = 1 - df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[pr_column] * df[f_column] - del df[pr_column] - - # from PM to PM10 & PM2.5 - if pollutant == 'pm': - df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] - # TODO Check fraction of pm2.5 - df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.5 - del df[p_column] - - # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in - xrange(self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] - - df = self.speciate_traffic(df, self.resuspension_speciation) - - # del df['Fleet_Code'] - - if settings.log_level_3: - print 'TIME -> Traffic.calculate_resuspension: {0} s'.format(round(gettime() - st_time, 2)) - - return df # .groupby(['tstep', 'Link_ID']).sum() - - def transform_df(self, df): - - df_list = [] - - for tstep in xrange(self.timestep_num): - pollutants_to_rename = [p for p in list(df.columns.values) if p.endswith('_{0}'.format(tstep))] - pollutants_renamed = [] - for p_name in pollutants_to_rename: - p_name_new = p_name.replace('_{0}'.format(tstep), '') - df.rename(columns={p_name: p_name_new}, inplace=True) - pollutants_renamed.append(p_name_new) - - df_aux = pd.DataFrame(df.loc[:, ['Link_ID', 'Fleet_Code'] + pollutants_renamed]) - df_aux['tstep'] = tstep - - df_list.append(df_aux) - for p_name in pollutants_renamed: - del df[p_name] - - df = pd.concat(df_list, ignore_index=True) - return df - - def speciate_traffic_old(self, df, speciation): - df_map = pd.read_csv(self.speciation_map, sep=';') - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - df = self.transform_df(df) - - speciation = pd.read_csv(speciation, sep=';') - del speciation['Copert_V_name'] - in_p_list = list(df.columns.values) - in_columns = ['Link_ID', 'Fleet_Code', 'tstep'] - for in_col in in_columns: - try: - in_p_list.remove(in_col) - except: - print 'ERROR', in_col - for in_col in in_p_list: - df.rename(columns={in_col: 'old_{0}'.format(in_col)}, inplace=True) - - out_p_list = list(speciation.columns.values) - out_p_list.remove('CODE_HERMESv3') - for p in out_p_list: - - speciation.rename(columns={p: 'f_{0}'.format(p)}, inplace=True) - - df = df.merge(speciation, left_on='Fleet_Code', right_on='CODE_HERMESv3', how='left') - try: - del df['CODE_HERMESv3'] - del df['index_right'] - except: - pass - - # print df_map.columns.values - for p in out_p_list: - if p == 'pmc': - df.loc[:, p] = df['old_pm10'] - df['old_pm25'] - if self.output_type == 'R-LINE': - # from g/km.h to g/m.s - df.loc[:, p] = df.loc[:, p] / (1000 * 3600) - elif self.output_type == 'CMAQ': - # from g/km.h to mol/km.s - df.loc[:, p] = df.loc[:, p] / 3600 - elif self.output_type == 'MONARCH': - # from g/km.h to Kg/km.s - df.loc[:, p] = df.loc[:, p] / (1000 * 3600) - else: - try: - in_p = df_map.loc[df_map['dst'] == p, 'src'].values[0] - except IndexError: - raise ValueError('The pollutant {0} does not appear in the traffic_speciation_map file'.format(p)) - - if in_p is not np.nan: - if in_p != 0: - df.loc[:, p] = df['old_{0}'.format(in_p)].multiply(df['f_{0}'.format(p)]) - try: - mol_w = self.molecular_weigths.loc[self.molecular_weigths['Specie'] == in_p, 'MW'].values[0] - except IndexError: - raise AttributeError('{0} not found in the molecular weights file.'.format(in_p)) - - if self.output_type == 'R-LINE': - # from g/km.h to g/m.s - df.loc[:, p] = df.loc[:, p] / (1000 * 3600) - elif self.output_type == 'CMAQ': - # from g/km.h to mol/km.s or g/km.s (aerosols) - df.loc[:, p] = df.loc[:, p] / (3600 * mol_w) - elif self.output_type == 'MONARCH': - if p.lower() in aerosols: - # from g/km.h to kg/km.s - df.loc[:, p] = df.loc[:, p] / (1000 * 3600 * mol_w) - else: - # from g/km.h to mol/km.s - df.loc[:, p] = df.loc[:, p] / (3600 * mol_w) - - else: - df.loc[:, p] = 0 - - tot_cols = list(df.columns.values) - for col in tot_cols: - if col.startswith('old_') or col.startswith('f_'): - del df[col] - - if settings.log_level_3: - print 'TIME -> Traffic.speciate_traffic: {0} s'.format(round(gettime() - st_time, 2)) - return df - - def speciate_traffic(self, df, speciation): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - # Reads maps dst to src pollutants - map = pd.read_csv(self.speciation_map, sep=';') - - # Reads speciation profile - speciation = pd.read_csv(speciation, sep=';') - del speciation['Copert_V_name'] - - # Transform dataset into timestep rows instead of timestep columns - df = self.transform_df(df) - - # Makes input list from input dataframe - in_list = list(df.columns.values) - in_columns = ['Link_ID', 'Fleet_Code', 'tstep'] - for in_col in in_columns: - try: - in_list.remove(in_col) - except: - print 'ERROR', in_col - - df_out_list = [] - - # PMC - if not set(speciation.columns.values).isdisjoint(pmc_list): - out_p = set(speciation.columns.values).intersection(pmc_list).pop() - speciation_by_in_p = speciation.loc[:, [out_p] + ['CODE_HERMESv3']] - - speciation_by_in_p.rename(columns={out_p: 'f_{0}'.format(out_p)}, inplace=True) - df_aux = df.loc[:, ['pm10', 'pm25', 'Fleet_Code', 'tstep', 'Link_ID']] - df_aux = df_aux.merge(speciation_by_in_p, left_on='Fleet_Code', right_on='CODE_HERMESv3', how='left') - try: - del df['CODE_HERMESv3'] - del df['index_right'] - except: - pass - - df_aux.loc[:, out_p] = df_aux['pm10'] - df_aux['pm25'] - if self.output_type == 'R-LINE': - # from g/km.h to g/m.s - df_aux.loc[:, out_p] = df_aux.loc[:, out_p] / (1000 * 3600) - elif self.output_type == 'CMAQ': - # from g/km.h to mol/km.s - df_aux.loc[:, out_p] = df_aux.loc[:, out_p] / 3600 - elif self.output_type == 'MONARCH': - # from g/km.h to Kg/km.s - df_aux.loc[:, out_p] = df_aux.loc[:, out_p] / (1000 * 3600) - - df_out_list.append(df_aux.loc[:, [out_p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) - del df_aux[out_p] - - for in_p in in_list: - # Get output list involved on that input pollutant - out_list = list(map.loc[map['src'] == in_p, 'dst'].unique()) - # Selecting only necessary speciation profiles - speciation_by_in_p = speciation.loc[:, out_list + ['CODE_HERMESv3']] - - # Adding "f_" in the formula column names - for p in out_list: - speciation_by_in_p.rename(columns={p: 'f_{0}'.format(p)}, inplace=True) - # Getting a slice of the full dataset to be merged - df_aux = df.loc[:, [in_p] + ['Fleet_Code', 'tstep', 'Link_ID']] - df_aux = df_aux.merge(speciation_by_in_p, left_on='Fleet_Code', right_on='CODE_HERMESv3', how='left') - try: - # Cleaning dataframe - del df['CODE_HERMESv3'] - del df['index_right'] - except: - pass - # Renaming pollutant columns by adding "old_" to the beginning. - df_aux.rename(columns={in_p: 'old_{0}'.format(in_p)}, inplace=True) - for p in out_list: - if in_p is not np.nan: - if in_p != 0: - df_aux.loc[:, p] = df_aux['old_{0}'.format(in_p)].multiply(df_aux['f_{0}'.format(p)]) - try: - mol_w = self.molecular_weigths.loc[self.molecular_weigths['Specie'] == in_p, 'MW'].values[0] - except IndexError: - raise AttributeError('{0} not found in the molecular weights file.'.format(in_p)) - - if self.output_type == 'R-LINE': - # from g/km.h to g/m.s - df_aux.loc[:, p] = df_aux.loc[:, p] / (1000 * 3600) - elif self.output_type == 'CMAQ': - # from g/km.h to mol/km.s or g/km.s (aerosols) - df_aux.loc[:, p] = df_aux.loc[:, p] / (3600 * mol_w) - elif self.output_type == 'MONARCH': - if p.lower() in aerosols: - # from g/km.h to kg/km.s - df_aux.loc[:, p] = df_aux.loc[:, p] / (1000 * 3600 * mol_w) - else: - # from g/km.h to mol/km.s - df_aux.loc[:, p] = df_aux.loc[:, p] / (3600 * mol_w) - else: - df_aux.loc[:, p] = 0 - - df_out_list.append(df_aux.loc[:, [p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) - del df_aux[p] - del df_aux - del df[in_p] - - df_out = pd.concat(df_out_list, axis=1) - return df_out - - def calculate_traffic_line_emissions(self, do_hot=True, do_cold=True, do_tyre_wear=True, do_brake_wear=True, do_road_wear=True, - do_resuspension=True, do_evaporative=False, do_other_cities=False): - df_accum = pd.DataFrame() - - if do_hot: - df_accum = pd.concat([df_accum, self.compact_hot_expanded(self.calculate_hot())]).groupby(['tstep', 'Link_ID']).sum() - # df_accum = pd.concat([df_accum, self.compact_hot_expanded(self.calculate_hot())]).groupby(['tstep', 'Link_ID']).sum() - if do_cold: - df_accum = pd.concat([df_accum, self.calculate_cold(self.calculate_hot())]).groupby(['tstep', 'Link_ID']).sum() - if do_tyre_wear: - df_accum = pd.concat([df_accum, self.calculate_tyre_wear()]).groupby(['tstep', 'Link_ID']).sum() - if do_brake_wear: - df_accum = pd.concat([df_accum, self.calculate_brake_wear()]).groupby(['tstep', 'Link_ID']).sum() - if do_road_wear: - df_accum = pd.concat([df_accum, self.calculate_road_wear()]).groupby(['tstep', 'Link_ID']).sum() - if do_resuspension: - df_accum = pd.concat([df_accum, self.calculate_resuspension()]).groupby(['tstep', 'Link_ID']).sum() - - df_accum = df_accum.reset_index().merge(self.road_links.loc[:, ['Link_ID', 'geometry']], left_on='Link_ID', right_on='Link_ID', how='left') - df_accum = gpd.GeoDataFrame(df_accum, crs=self.crs) - df_accum.set_index(['Link_ID', 'tstep'], inplace=True) - return df_accum - - def links_to_grid(self, link_emissions, grid_shape): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - link_emissions.reset_index(inplace=True) - if not os.path.exists(self.link_to_grid_csv): - link_emissions_aux = link_emissions.loc[link_emissions['tstep'] == 0, :] - link_emissions_aux = link_emissions_aux.to_crs(grid_shape.crs) - - link_emissions_aux = gpd.sjoin(link_emissions_aux, grid_shape, how="inner", op='intersects') - link_emissions_aux = link_emissions_aux.loc[:, ['Link_ID', 'geometry', 'FID']] - link_emissions_aux = link_emissions_aux.merge(grid_shape.loc[:, ['FID', 'geometry']], left_on='FID', right_on='FID', how='left') - - length_list = [] - link_id_list = [] - fid_list = [] - count = 1 - for i, line in link_emissions_aux.iterrows(): - # print "{0}/{1}".format(count, len(link_emissions_aux)) - count += 1 - aux = line.get('geometry_x').intersection(line.get('geometry_y')) - if not aux.is_empty: - link_id_list.append(line.get('Link_ID')) - fid_list.append(line.get('FID')) - length_list.append(aux.length / 1000) - - link_grid = pd.DataFrame({'Link_ID': link_id_list, 'FID': fid_list, 'length': length_list}) - - # link_grid.to_csv(self.link_to_grid_csv) - else: - link_grid = pd.read_csv(self.link_to_grid_csv) - - del grid_shape['geometry'], link_emissions['geometry'] - - link_grid = link_grid.merge(link_emissions, left_on='Link_ID', right_on='Link_ID') - try: - del link_grid['Unnamed: 0'] - except: - pass - del link_grid['Link_ID'] - - # print link_grid - - cols_to_update = list(link_grid.columns.values) - cols_to_update.remove('length') - cols_to_update.remove('tstep') - cols_to_update.remove('FID') - for col in cols_to_update: - # print col - link_grid.loc[:, col] = link_grid[col] * link_grid['length'] - del link_grid['length'] - - link_grid = link_grid.groupby(['tstep', 'FID']).sum() - link_grid.reset_index(inplace=True) - - link_grid_list = settings.comm.gather(link_grid, root=0) - if settings.rank == 0: - link_grid = pd.concat(link_grid_list) - link_grid = link_grid.groupby(['tstep', 'FID']).sum() - # link_grid.sort_index(inplace=True) - link_grid.reset_index(inplace=True) - - emission_list = [] - out_poll_names = list(link_grid.columns.values) - out_poll_names.remove('tstep') - out_poll_names.remove('FID') - - for p in out_poll_names: - # print p - data = np.zeros((self.timestep_num, len(grid_shape))) - for tstep in xrange(self.timestep_num): - data[tstep, link_grid.loc[link_grid['tstep'] == tstep, 'FID']] = \ - link_grid.loc[link_grid['tstep'] == tstep, p] - # data[tstep, link_grid.index] = link_grid['{0}_{1}'.format(p, tstep)] - # print p, data.sum() - # TODO Check units MARC - dict_aux = { - 'name': p, - 'units': None, - 'data': data - } - - if self.output_type == 'R-LINE': - # from g/km.h to g/m.s - pass - elif self.output_type == 'CMAQ': - # from g/km.h to mol/km.s - if p.lower() in aerosols: - dict_aux['units'] = '0.001 kg.s-1' - else: - dict_aux['units'] = 'kat' - elif self.output_type == 'MONARCH': - if p.lower() in aerosols: - dict_aux['units'] = 'kg.s-1' - else: - dict_aux['units'] = 'kat' - emission_list.append(dict_aux) - if settings.log_level_3: - print 'TIME -> Traffic.links_to_grid: {0} s'.format(round(gettime() - st_time, 2)) - - return emission_list - else: - return None - - def links_to_grid_new(self, link_emissions, grid_shape): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - - if not os.path.exists(self.link_to_grid_csv): - link_emissions.reset_index(inplace=True) - link_emissions_aux = link_emissions.loc[link_emissions['tstep'] == 0, :] - link_emissions_aux = link_emissions_aux.to_crs(grid_shape.crs) - - link_emissions_aux = gpd.sjoin(link_emissions_aux, grid_shape, how="inner", op='intersects') - link_emissions_aux = link_emissions_aux.loc[:, ['Link_ID', 'geometry', 'FID']] - link_emissions_aux = link_emissions_aux.merge(grid_shape.loc[:, ['FID', 'geometry']], left_on='FID', right_on='FID', how='left') - - length_list = [] - link_id_list = [] - fid_list = [] - count = 1 - for i, line in link_emissions_aux.iterrows(): - # print "{0}/{1}".format(count, len(link_emissions_aux)) - count += 1 - aux = line.get('geometry_x').intersection(line.get('geometry_y')) - if not aux.is_empty: - link_id_list.append(line.get('Link_ID')) - fid_list.append(line.get('FID')) - length_list.append(aux.length / 1000) - - link_grid = pd.DataFrame({'Link_ID': link_id_list, 'FID': fid_list, 'length': length_list}) - - # link_grid.to_csv(self.link_to_grid_csv) - else: - link_grid = pd.read_csv(self.link_to_grid_csv) - - del grid_shape['geometry'], link_emissions['geometry'] - - link_grid = link_grid.merge(link_emissions, left_on='Link_ID', right_on='Link_ID') - - try: - del link_grid['Unnamed: 0'] - except: - pass - del link_grid['Link_ID'] - - p_list = [e for e in list(link_grid.columns.values) if e not in ('length', 'tstep', 'FID')] - - link_grid.loc[:, p_list] = link_grid[p_list].multiply(link_grid['length'], axis=0) - - del link_grid['length'] - - link_grid = link_grid.groupby(['FID', 'tstep']).sum() - - return link_grid - - @staticmethod - def nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None): - """Finds the nearest point and return the corresponding value from specified column. - https://automating-gis-processes.github.io/2017/lessons/L3/nearest-neighbour.html#nearest-points-using-geopandas - """ - - # Find the geometry that is closest - nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1] - # Get the corresponding value from df2 (matching is based on the geometry) - value = df2[nearest][src_column].get_values()[0] - return value - - @staticmethod - def write_rline(emissions, output_dir, start_date): - from datetime import timedelta - # emissions = emissions.head(5) - # print emissions - # print len(emissions) - - emissions_list = settings.comm.gather(emissions, root=0) - if settings.rank == 0: - emissions = pd.concat(emissions_list) - - p_list = list(emissions.columns.values) - p_list.remove('tstep') - p_list.remove('Link_ID') - p_list.remove('geometry') - for p in p_list: - link_list = ['L_{0}'.format(x) for x in list(pd.unique(emissions['Link_ID']))] - out_df = pd.DataFrame(columns=["Year", "Mon", "Day", "JDay", "Hr"] + link_list) - for tstep, aux in emissions.loc[:, ['tstep', 'Link_ID', p]].groupby('tstep'): - # out_ds = pd.Series( - # columns=["Year", "Mon", "Day", "JDay", "Hr"] + list(pd.unique(emissions['Link_ID']))) - # aux_df = aux.copy() - aux_date = start_date + timedelta(hours=tstep) - # print out_df - out_df.loc[tstep, 'Year'] = aux_date.strftime('%y') - out_df.loc[tstep, 'Mon'] = aux_date.month - out_df.loc[tstep, 'Day'] = aux_date.day - out_df.loc[tstep, 'JDay'] = aux_date.strftime('%j') - out_df.loc[tstep, 'Hr'] = aux_date.hour - out_df.loc[tstep, link_list] = aux.loc[:, [p]].transpose().values - - out_df.to_csv(os.path.join(output_dir, 'rline_{1}_{0}.csv'.format(p, start_date.strftime('%Y%m%d'))), index=False) - - settings.comm.Barrier() - return True - - def write_rline_roadlinks(self, df_in): - # df_out = pd.DataFrame() - - df_in_list = settings.comm.gather(df_in, root=0) - if settings.rank == 0: - df_in = pd.concat(df_in_list) - - df_out = pd.DataFrame( - columns=['Group', 'X_b', 'Y_b', 'Z_b', 'X_e', 'Y_e', 'Z_e', 'dCL', 'sigmaz0', '#lanes', - 'lanewidth', 'Emis', 'Hw1', 'dw1', 'Hw2', 'dw2', 'Depth', 'Wtop', 'Wbottom', - 'l_bh2sw', 'l_avgbh', 'l_avgbdensity', 'l_bhdev', 'X0_af', 'X45_af', - 'X90_af', 'X135_af', 'X180_af', 'X225_af', 'X270_af', 'X315_af', 'l_maxbh', 'Link_ID']) - df_err_list = [] - - df_in = df_in.to_crs({u'units': u'm', u'no_defs': True, u'ellps': u'intl', u'proj': u'utm', u'zone': 31}) - if rline_shp: - gpd.GeoDataFrame().to_file - df_in.to_file(os.path.join(self.output_dir, 'roads.shp')) - - count = 0 - for i, line in df_in.iterrows(): - try: - df_out.loc[count] = pd.Series({ - 'Group': 'G1', - 'X_b': round(line.get('geometry').coords[0][0], 3), - 'Y_b': round(line.get('geometry').coords[0][1], 3), - 'Z_b': 1, - 'X_e': round(line.get('geometry').coords[-1][0], 3), - 'Y_e': round(line.get('geometry').coords[-1][1], 3), - 'Z_e': 1, - 'dCL': 0, - 'sigmaz0': 2, - '#lanes': 3, - 'lanewidth': 2.5, - 'Emis': 1, - 'Hw1': 0, - 'dw1': 0, - 'Hw2': 0, - 'dw2': 0, - 'Depth': 0, - 'Wtop': 0, - 'Wbottom': 0, - 'l_bh2sw': round(line.get('bh_2_sw'), 3), - 'l_avgbh': round(line.get('mean_heigh'), 3), - 'l_avgbdensity': round(line.get('area_densi'), 3), - 'l_bhdev': round(line.get('sd_height'), 3), - 'X0_af': round(line.get('af_0'), 3), - 'X45_af': round(line.get('af_45'), 3), - 'X90_af': round(line.get('af_90'), 3), - 'X135_af': round(line.get('af_135'), 3), - 'X180_af': round(line.get('af_180'), 3), - 'X225_af': round(line.get('af_225'), 3), - 'X270_af': round(line.get('af_270'), 3), - 'X315_af': round(line.get('af_315'), 3), - 'l_maxbh': round(line.get('max_height'), 3), - 'Link_ID': line.get('Link_ID'), - }) - count += 1 - except: - # df_err_list.append(line) - pass - - df_out.set_index('Link_ID', inplace=True) - df_out.sort_index(inplace=True) - df_out.to_csv(os.path.join(self.output_dir, 'roads.txt'), index=False, sep=' ') - settings.comm.Barrier() - - return True - - -if __name__ == '__main__': - from datetime import datetime - - t = Traffic('/home/Earth/ctena/Models/HERMESv3/IN/data/traffic/road_links/BCN/road_links_BCN.shp', - '/home/Earth/ctena/Models/HERMESv3/IN/data/traffic/fleet_compo', - '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic/speed_hourly.csv', - '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic/aadt_m_mn.csv', - '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic/aadt_week.csv', - '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic/aadt_h_mn.csv', - '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic/aadt_h_wd.csv', - '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic/aadt_h_sat.csv', - '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic/aadt_h_sun.csv', - '/home/Earth/ctena/Models/HERMESv3/IN/data/traffic/ef', - #['nox_no2', 'nh3'], - ['nox_no2'], - datetime(year=2015, month=01, day=31), load=0.5, timestep_type='hourly', timestep_num=2, timestep_freq=1, - temp_common_path='/esarchive/recon/ecmwf/era5/1hourly/tas/') - - t.calculate_traffic_line_emissions() - print t.tyre_wear - print t.brake_wear - print t.road_wear - # del hot_expanded['geometry'] - # hot_expanded = hot_expanded.loc[(hot_expanded['Fleet_Code'] == 'PCG_11') | (hot_expanded['Fleet_Code'] == 'PCG_12'), :] - # hot_expanded.to_csv('/home/Earth/ctena/Models/HERMESv3/OUT/testing.csv') - - # cold_links = t.road_links.copy() - # print cold_links.columns.values - # - # cold_links.loc[:, 'centroid'] = cold_links['geometry'].centroid - # - # temperature = t.read_temperature() - # - # unary_union = temperature.unary_union - # - # cold_links['nearest_id'] = cold_links.apply(t.nearest, geom_union=unary_union, df1=cold_links, df2=temperature, geom1_col='centroid', - # src_column='t_0', axis=1) - # - # print cold_links - # print temperature diff --git a/hermesv3_bu/modules/traffic/traffic_area.py b/hermesv3_bu/modules/traffic/traffic_area.py deleted file mode 100644 index 9a2daabed928150ce7239d1d536f3f012811e535..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/traffic/traffic_area.py +++ /dev/null @@ -1,584 +0,0 @@ -#!/usr/bin/env python - -""" -Copyright 2018 Earth Sciences Department, BSC-CNS - - This file is part of HERMESv3. - - HERMESv3 is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - HERMESv3 is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with HERMESv3. If not, see . -""" - -__author__ = "Carles Tena" -__copyright__ = "Copyright 2018" -__email__ = "carles.tena@bsc.es" -__license__ = "GNU General Public License" -__maintainer__ = "Carles Tena" -__version__ = "3.3.2" - -from memory_profiler import profile - -import sys -import os -from timeit import default_timer as gettime - -parentPath = '/home/Earth/ctena/Models/HERMESv3/IN' -if parentPath not in sys.path: - sys.path.insert(0, parentPath) - -import IN.src.config.settings as settings -from IN.src.modules.bottomup.traffic.traffic import Traffic -import geopandas as gpd -import pandas as pd -import numpy as np -from datetime import datetime - -pmc_list = ['pmc', 'PMC'] - -class TrafficArea(object): - def __init__(self, global_path, auxiliary_dir, - do_evaporative=True, gasoline_path=None, total_pop_by_prov=None, nuts_shapefile=None, - do_small_cities=True, small_cities_shp=None): - try: - settings.log_level_3 - except AttributeError: - settings.define_global_vars(3) - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - self.auxiliary_dir = auxiliary_dir - - if not os.path.exists(os.path.join(auxiliary_dir, 'population')): - os.makedirs(os.path.join(auxiliary_dir, 'population')) - - if do_evaporative: - self.evaporative = self.init_evaporative(global_path, auxiliary_dir, nuts_shapefile, gasoline_path, total_pop_by_prov) - - if do_small_cities: - self.small_cities = self.init_small_citites(global_path, auxiliary_dir, small_cities_shp) - - if settings.log_level_3: - print 'TIME -> TrafficArea.__init__: {0} s'.format(round(gettime() - st_time, 2)) - - return None - - def init_evaporative(self, global_path, auxiliary_dir, provinces_shapefile, gasoline_path, total_pop_by_prov): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - if not os.path.exists(os.path.join(auxiliary_dir, 'vehicle_by_cell.shp')): - grid_shape_path = os.path.join(auxiliary_dir, 'shapefile', 'grid_shapefile.shp') - pop = self.get_clipped_population(global_path, grid_shape_path, - os.path.join(auxiliary_dir, 'population', 'population.shp')) - pop_nut = self.make_population_by_nuts(pop, provinces_shapefile, - os.path.join(auxiliary_dir, 'population', 'pop_NUT.shp')) - pop_nut_cell = self.make_population_by_nuts_cell(pop_nut, grid_shape_path, - os.path.join(auxiliary_dir, 'population', - 'pop_NUT_cell.shp')) - veh_cell = self.make_vehicles_by_cell(pop_nut_cell, gasoline_path, pd.read_csv(total_pop_by_prov), grid_shape_path, - os.path.join(auxiliary_dir, 'vehicle_by_cell.shp')) - else: - veh_cell = gpd.read_file(os.path.join(auxiliary_dir, 'vehicle_by_cell.shp')) - - if settings.log_level_3: - print 'TIME -> TrafficArea.init_evaporative: {0} s'.format(round(gettime() - st_time, 2)) - - return veh_cell - - def init_small_citites(self, global_path, auxiliary_dir, small_cities_shapefile): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - if not os.path.exists(os.path.join(auxiliary_dir, 'population', 'pop_SMALL_cell.shp')): - grid_shape_path = os.path.join(auxiliary_dir, 'shapefile', 'grid_shapefile.shp') - pop = self.get_clipped_population(global_path, grid_shape_path, - os.path.join(auxiliary_dir, 'population', 'population.shp')) - pop_nut = self.make_population_by_nuts(pop, small_cities_shapefile, - os.path.join(auxiliary_dir, 'population', 'pop_SMALL.shp')) - pop_nut_cell = self.make_population_by_nuts_cell(pop_nut, grid_shape_path, - os.path.join(auxiliary_dir, 'population', - 'pop_SMALL_cell.shp')) - else: - pop_nut_cell = gpd.read_file(os.path.join(auxiliary_dir, 'population', 'pop_SMALL_cell.shp')) - - if settings.log_level_3: - print 'TIME -> TrafficArea.init_small_citites: {0} s'.format(round(gettime() - st_time, 2)) - - return pop_nut_cell - - def get_clipped_population(self, global_path, to_clip_shapefile, population_shapefile_path): - from IN.src.tools.raster import Raster - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - if not os.path.exists(population_shapefile_path): - if settings.rank == 0: - Raster(Raster(global_path).clip_raster_with_shapefile(to_clip_shapefile, os.path.join(self.auxiliary_dir, 'population', 'pop.tiff'))).to_shapefile(out_path=population_shapefile_path) - - df = gpd.read_file(population_shapefile_path) - - if settings.log_level_3: - print 'TIME -> TrafficArea.get_clipped_population: {0} s'.format(round(gettime() - st_time, 2)) - - return df - - def make_population_by_nuts(self, population_shape, nut_shp, pop_by_nut_path, write_file=True, csv_path=None, column_id='ORDER07'): - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - if not os.path.exists(pop_by_nut_path): - nut_df = gpd.read_file(nut_shp) - population_shape['area_in'] = population_shape.geometry.area - df = gpd.overlay(population_shape, nut_df.to_crs(population_shape.crs), how='intersection') - df.crs = population_shape.crs - df.loc[:, 'data'] = df['data'] * (df.geometry.area / df['area_in']) - del df['area_in'] - if write_file: - df.to_file(pop_by_nut_path) - if csv_path is not None: - df = df.loc[:, ['data', column_id]].groupby(column_id).sum() - df.to_csv(csv_path) - else: - df = gpd.read_file(pop_by_nut_path) - - if settings.log_level_3: - print 'TIME -> TrafficArea.make_population_by_nuts: {0} s'.format(round(gettime() - st_time, 2)) - - return df - - def make_population_by_nuts_cell(self, pop_by_nut, grid_shp_path, pop_nut_cell_path, write_file=True): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - # pop_nut_cell_path = os.path.join(self.auxiliary_dir, 'population', 'pop_NUT_cell.shp') - - if not os.path.exists(pop_nut_cell_path): - - grid_shp = gpd.read_file(grid_shp_path) - - pop_by_nut = pop_by_nut.to_crs(grid_shp.crs) - - del pop_by_nut['NAME'] - pop_by_nut['area_in'] = pop_by_nut.geometry.area - - df = gpd.overlay(pop_by_nut, grid_shp, how='intersection') - df.crs = grid_shp.crs - df.loc[:, 'data'] = df['data'] * (df.geometry.area / df['area_in']) - del pop_by_nut['area_in'] - if write_file: - df.to_file(pop_nut_cell_path) - else: - df = gpd.read_file(pop_nut_cell_path) - - if settings.log_level_3: - print 'TIME -> TrafficArea.make_population_by_nuts_cell: {0} s'.format(round(gettime() - st_time, 2)) - - return df - - def make_vehicles_by_cell(self, pop_nut_cell, gasoline_path, total_pop_by_nut, grid_shape_path, veh_by_cell_path, column_id='ORDER07'): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - # veh_by_cell_path = os.path.join(self.auxiliary_dir, 'vehicle_by_cell.shp') - - if not os.path.exists(veh_by_cell_path): - total_pop_by_nut.loc[:, column_id] = total_pop_by_nut[column_id].astype(np.int16) - pop_nut_cell.loc[:, column_id] = pop_nut_cell[column_id].astype(np.int16) - - df = pop_nut_cell.merge(total_pop_by_nut, left_on=column_id, right_on=column_id, how='left') - - df['pop_percent'] = df['data_x'] / df['data_y'] - del df['data_x'], df['data_y'], df['CELL_ID'] - - gas_df = pd.read_csv(gasoline_path, index_col='COPERT_V_name').transpose() - vehicle_type_list = list(gas_df.columns.values) - gas_df.loc[:, column_id] = gas_df.index.astype(np.int16) - - df = df.merge(gas_df, left_on=column_id, right_on=column_id, how='left') - for vehicle_type in vehicle_type_list: - df.loc[:, vehicle_type] = df[vehicle_type] * df['pop_percent'] - - del df['pop_percent'], df[column_id] - - aux_df = df.loc[:, ['FID'] + vehicle_type_list].groupby('FID').sum() - aux_df.loc[:, 'FID'] = aux_df.index - grid_shape = gpd.read_file(grid_shape_path) - geom = grid_shape.loc[aux_df.index, 'geometry'] - - df = gpd.GeoDataFrame(aux_df, geometry=geom, crs=pop_nut_cell.crs) - - df.to_file(veh_by_cell_path) - - else: - df = gpd.read_file(veh_by_cell_path) - - if settings.log_level_3: - print 'TIME -> TrafficArea.make_vehicles_by_cell: {0} s'.format(round(gettime() - st_time, 2)) - - return df - - @staticmethod - def get_profiles_from_temperature(temperature, default=False): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - temperature = temperature.copy() - if default: - default_profile = np.array([0.025,0.025,0.025,0.025,0.025,0.027083,0.03125,0.0375,0.045833,0.05625,0.060417,0.066667,0.06875,0.072917,0.070833,0.064583,0.05625,0.045833,0.0375,0.03125,0.027083,0.025,0.025,0.025]) - for x in xrange(24): - temperature['t_{0}'.format(x)] = default_profile[x] - - else: - temp_list = ['t_{0}'.format(x) for x in xrange(24)] - temperature.loc[:, temp_list] = temperature[temp_list] + 273.15 - - temperature.loc[:, temp_list] = temperature[temp_list].subtract(temperature[temp_list].min(axis=1), axis=0) - - temperature.loc[:, temp_list] = temperature[temp_list].div(temperature[temp_list].max(axis=1) - temperature[temp_list].min(axis=1), axis=0) - - aux = temperature[temp_list].replace({0: np.nan}) - second_min = aux[temp_list].min(axis=1) - - temperature.loc[:, temp_list] = temperature[temp_list].add(second_min, axis=0) - temperature.loc[:, temp_list] = temperature[temp_list].div(temperature[temp_list].sum(axis=1), axis=0) - - if settings.log_level_3: - print 'TIME -> TrafficArea.get_profiles_from_temperature: {0} s'.format(round(gettime() - st_time, 2)) - - return temperature - - def calculate_evaporative_emissions(self, temperature_dir, ef_file, date, tstep_num, tstep_frq, speciation_map_path, - speciation_profile_path): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - veh_list = list(self.evaporative.columns.values) - veh_list.remove('FID') - veh_list.remove('geometry') - if 'T_REC' in veh_list: - veh_list.remove('T_REC') - - crs = self.evaporative.crs - geom = self.evaporative.geometry - - # get average daily temperature by cell - aux_df = self.evaporative.loc[:, 'geometry'].to_crs({'init': 'epsg:4326'}) - self.evaporative['c_lat'] = aux_df.centroid.y - self.evaporative['c_lon'] = aux_df.centroid.x - self.evaporative['centroid'] = aux_df.centroid - - temperature = Traffic.read_temperature(self.evaporative['c_lon'].min(), self.evaporative['c_lon'].max(), - self.evaporative['c_lat'].min(), self.evaporative['c_lat'].max(), - temperature_dir, date.replace(hour=0, minute=0, second=0, microsecond=0), - 24, 1) - - temperature_mean = gpd.GeoDataFrame(temperature[['t_{0}'.format(x) for x in xrange(24)]].mean(axis=1), columns=['temp'], geometry=temperature.geometry) - temperature_mean['REC'] = temperature['REC'] - - if 'T_REC' not in self.evaporative.columns.values: - self.evaporative['T_REC'] = self.evaporative.apply(Traffic.nearest, geom_union=temperature_mean.unary_union, - df1=self.evaporative, df2=temperature_mean, geom1_col='centroid', - src_column='REC', axis=1) - del self.evaporative['c_lat'], self.evaporative['c_lon'], self.evaporative['centroid'] - - self.evaporative.to_file(os.path.join(self.auxiliary_dir, 'vehicle_by_cell.shp')) - else: - del self.evaporative['c_lat'], self.evaporative['c_lon'], self.evaporative['centroid'] - - self.evaporative = self.evaporative.merge(temperature_mean, left_on='T_REC', right_on='REC', how='left') - - ef_df = pd.read_csv(ef_file, sep=';') - del ef_df['canister'], ef_df['Copert_V_name'] - ef_df.loc[ef_df['Tmin'].isnull(), 'Tmin'] = -999 - ef_df.loc[ef_df['Tmax'].isnull(), 'Tmax'] = 999 - - for vehicle_type in veh_list: - - self.evaporative['EF'] = np.nan - ef_aux = ef_df.loc[ef_df['CODE_HERMESv3'] == vehicle_type] - for i, line in ef_aux.iterrows(): - self.evaporative.loc[(self.evaporative['temp'] < line.get('Tmax')) & (self.evaporative['temp'] >= line.get('Tmin')), 'EF'] = line.get('EFbase') * line.get('TF') - - self.evaporative.loc[:, vehicle_type] = self.evaporative[vehicle_type] * self.evaporative['EF'] - - self.evaporative.loc[:, 'nmvoc'] = self.evaporative.loc[:, veh_list].sum(axis=1) - self.evaporative = gpd.GeoDataFrame(self.evaporative.loc[:, ['nmvoc', 'T_REC', 'FID']], geometry=geom, crs=crs) - - # TODO change units function 3600 cell area - self.evaporative = self.speciate_evaporative(speciation_map_path, speciation_profile_path) - - self.evaporative = self.evaporative_temporal_distribution(self.get_profiles_from_temperature(temperature), date, tstep_num, tstep_frq) - - self.evaporative.set_index(['FID', 'tstep'], inplace=True) - if settings.log_level_3: - print 'TIME -> TrafficArea.calculate_evaporative_emissions: {0} s'.format(round(gettime() - st_time, 2)) - - return True - - def evaporative_temporal_distribution(self, temporal_profiles, date, tstep_num, tstep_frq): - from datetime import timedelta - aux = self.evaporative.merge(temporal_profiles, left_on='T_REC', right_on='REC', how='left') - # print aux - temporal_df_list = [] - pollutant_list = [e for e in self.evaporative.columns.values if e not in ('T_REC', 'FID', 'geometry')] - # print pollutant_list - - for tstep in xrange(tstep_num): - # print tstep - # print aux[pollutant_list] - # print aux['t_{0}'.format(date.hour)] - # print aux[pollutant_list] * 2 - # aux_temporal = aux[pollutant_list] * aux['t_{0}'.format(date.hour)] - - aux_temporal = aux[pollutant_list].multiply(aux['t_{0}'.format(date.hour)], axis=0) - aux_temporal['FID'] = aux['FID'] - aux_temporal['tstep'] = tstep - temporal_df_list.append(aux_temporal) - date = date + timedelta(hours=tstep_frq) - df = pd.concat(temporal_df_list) - return df - - def speciate_evaporative(self, map_path, profile_path): - - speciated_df = self.evaporative.drop(columns=['nmvoc']) - speciation_map = pd.read_csv(map_path, sep=';') - dst_pollutant_list = list(speciation_map.loc[speciation_map['src'] == 'nmvoc', 'dst'].values) - speciation_profile = pd.read_csv(profile_path, sep=';').iloc[0].drop(labels=['CODE_HERMESv3', 'Copert_V_name']) - - for p in dst_pollutant_list: - # From g/day to mol/day - speciated_df[p] = self.evaporative['nmvoc'] * speciation_profile.get(p) - return speciated_df - - def small_cities_emissions_by_population(self, df, ef_file): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - df = df.loc[:, ['data', 'FID']].groupby('FID').sum() - # print pop_nut_cell - ef_df = pd.read_csv(ef_file, sep=';') - # print ef_df - ef_df.drop(['CODE_HERMESv3', 'Copert_V_name'], axis=1, inplace=True) - for pollutant in ef_df.columns.values: - # print ef_df[pollutant].iloc[0] - df[pollutant] = df['data'] * ef_df[pollutant].iloc[0] - df.drop('data', axis=1, inplace=True) - - if settings.log_level_3: - print 'TIME -> TrafficArea.small_cities_emissions_by_population: {0} s'.format(round(gettime() - st_time, 2)) - - return df - - def speciate_small_cities(self, small_cities, molecular_weights_path, speciation_map, speciation_profile): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - molecular_weights = pd.read_csv(molecular_weights_path, sep=';') - speciation_map = pd.read_csv(speciation_map, sep=';') - speciation_profiles = pd.read_csv(speciation_profile, sep=';') - - in_p_list = list(small_cities.columns.values) - df = pd.DataFrame() - for in_p in in_p_list: - out_p_list = list(speciation_map.loc[speciation_map['src'] == in_p, 'dst'].values) - for out_p in out_p_list: - #from kg/year to mol/year (gases) or g/year (aerosols) - df[out_p] = small_cities[in_p] * (speciation_profiles[out_p].iloc[0] / 1000 * molecular_weights.loc[molecular_weights['Specie'] == in_p, 'MW'].values[0]) - if not set(speciation_profiles.columns.values).isdisjoint(pmc_list): - out_p = set(speciation_profiles.columns.values).intersection(pmc_list).pop() - try: - df[out_p] = small_cities['pm10'] - small_cities['pm25'] - except KeyError as e: - raise KeyError('{0} pollutant do not appear on the evaporative EF.'.format(e)) - - if settings.log_level_3: - print 'TIME -> TrafficArea.speciate_small_cities: {0} s'.format(round(gettime() - st_time, 2)) - - return df - - @staticmethod - def add_timezones(grid, default=False): - from timezonefinder import TimezoneFinder - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - if default: - grid['timezone'] = 'Europe/Madrid' - else: - tz = TimezoneFinder() - grid = gpd.read_file('/home/Earth/ctena/Models/HERMESv3/OUT/timezones.shp') - aux_grid = grid.to_crs({'init': 'epsg:4326'}) - aux_grid['lats'] = aux_grid.geometry.centroid.y - aux_grid['lons'] = aux_grid.geometry.centroid.x - print ' to timezone' - # print aux_grid.apply(lambda x: tz.timezone_at(lng=aux_grid['c_lons'], lat=aux_grid['c_lats']), axis=0) - # grid['timezone'] = aux_grid.apply(lambda x: tz.timezone_at(lng=x['lons'], lat=x['lats']), axis=1) - # grid.to_file('/home/Earth/ctena/Models/HERMESv3/OUT/timezones.shp') - inc = 1 - - while len(grid.loc[grid['timezone'] == '', :]) > 0: - print len(grid.loc[grid['timezone'] == '', :]) - grid.loc[grid['timezone'] == '', 'timezone'] = aux_grid.loc[grid['timezone'] == '', :].apply(lambda x: tz.closest_timezone_at(lng=x['lons'], lat=x['lats'], delta_degree=inc), axis=1) - inc += 1 - grid.to_file('/home/Earth/ctena/Models/HERMESv3/OUT/timezones_2.shp') - - if settings.log_level_3: - print 'TIME -> TrafficArea.add_timezones: {0} s'.format(round(gettime() - st_time, 2)) - - return grid - - def temporal_distribution_small(self, small_cities, grid, montly_profile, weekly_profile, hourly_profile, - starting_date, tstep_num, tstep_frq): - import pytz - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - p_names = small_cities.columns.values - - small_cities = small_cities.merge(grid.loc[:, ['timezone']], left_index=True, right_index=True, how='left') - - small_cities.loc[:, 'utc'] = starting_date - small_cities['date'] = small_cities.groupby('timezone')['utc'].apply( - lambda x: pd.to_datetime(x).dt.tz_localize(pytz.utc).dt.tz_convert(x.name).dt.tz_localize(None)) - small_cities.drop(['utc', 'timezone'], inplace=True, axis=1) - # print small_cities - - montly_profile = pd.read_csv(montly_profile, sep=',', index_col=0).T - weekly_profile = pd.read_csv(weekly_profile, sep=',', index_col=0).T - hourly_profile = pd.read_csv(hourly_profile, sep=',', index_col=0).T - - df_list = [] - for tstep in xrange(tstep_num): - small_cities['month'] = small_cities['date'].dt.month - small_cities['weekday'] = small_cities['date'].dt.dayofweek - small_cities['hour'] = small_cities['date'].dt.hour - small_cities.loc[small_cities['weekday'] <= 4, 'day_type'] = 'Weekday' - small_cities.loc[small_cities['weekday'] == 5, 'day_type'] = 'Saturday' - small_cities.loc[small_cities['weekday'] == 6, 'day_type'] = 'Sunday' - - for i, aux in small_cities.groupby(['month', 'weekday', 'hour', 'day_type']): - small_cities.loc[aux.index, 'f'] = montly_profile.loc[str(i[0]), 1] * \ - weekly_profile.loc[str(i[1]), 1] * \ - hourly_profile.loc[str(i[2]), i[3]] * \ - 1/3600 - - aux_df = small_cities.loc[:, p_names].multiply(small_cities['f'], axis=0) - aux_df['tstep'] = tstep - aux_df.set_index('tstep', append=True, inplace=True) - df_list.append(aux_df) - - small_cities['date'] = small_cities['date'] + pd.to_timedelta(tstep_frq, unit='h') - df = pd.concat(df_list) - - if settings.log_level_3: - print 'TIME -> TrafficArea.temporal_distribution_small: {0} s'.format(round(gettime() - st_time, 2)) - - return df - - def calculate_small_cities_emissions(self, ef_file, - molecular_weights_path, speciation_map, speciation_profile, - montly_profile, weekly_profile, hourly_profile, - starting_date, tstep_num, tstep_frq): - - if settings.log_level_3: - st_time = gettime() - else: - st_time = None - - # EF - self.small_cities = self.small_cities_emissions_by_population(self.small_cities, ef_file) - - # Spectiacion - self.small_cities = self.speciate_small_cities(self.small_cities, molecular_weights_path, speciation_map, speciation_profile) - # print len(small_cities) - # Temporal - grid = self.add_timezones(gpd.read_file(os.path.join(self.auxiliary_dir, 'shapefile', 'grid_shapefile.shp')), default=True) - self.small_cities = self.temporal_distribution_small(self.small_cities, grid, montly_profile, weekly_profile, hourly_profile, - starting_date, tstep_num, tstep_frq) - - if settings.log_level_3: - print 'TIME -> TrafficArea.calculate_small_cities_emissions: {0} s'.format(round(gettime() - st_time, 2)) - -if __name__ == '__main__': - tiff_path = '/home/Earth/ctena/Models/HERMESv3/IN/data/GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0.tif' - aux_path = '/home/Earth/ctena/Models/HERMESv3/IN/data/auxiliar_files/lcc_4000.0_4000.0' - provinces_path = '/home/Earth/ctena/Models/HERMESv3/IN/data/Shapefiles/Provinces/ES_Provinces.shp' - small_cities_path = '/home/Earth/ctena/Models/HERMESv3/IN/data/Shapefiles/small_cities/small_cities.shp' - - gas_path = '/home/Earth/ctena/Models/HERMESv3/IN/data/traffic_area/gasoline_vehicles_provinces_2015.csv' - - ip_pop_by_prov = '/home/Earth/ctena/Models/HERMESv3/IN/data/traffic_area/population_by_mun.csv' - tas_dir = '/esarchive/recon/ecmwf/era5/original_files/reorder/1hourly/tas/' - evaporative_ef_file = '/home/Earth/ctena/Models/HERMESv3/IN/data/traffic_area/ef/evaporative_nmvoc.csv' - speciation_map = '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/speciation/traffic_area/map_cmaq.csv' - profile_evaporative = '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/speciation/traffic_area/evaporative_cmaq.csv' - - mol_weigths = '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/speciation/MolecularWeights.csv' - profile_small_cities = '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/speciation/traffic_area/small_cities_cmaq.csv' - small_cities_ef_file = '/home/Earth/ctena/Models/HERMESv3/IN/data/traffic_area/ef/small_cities.csv' - h_profile = '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic_area/small_cities_hour.csv' - w_profile = '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic_area/small_cities_week.csv' - m_profile = '/home/Earth/ctena/Models/HERMESv3/IN/data/profiles/temporal/traffic_area/small_cities_month.csv' - - date_to_simulate = datetime(year=2010, month=1, day=1, hour=0) - timestep_num = 49 - timestep_frq = 1 - - - - t = TrafficArea(tiff_path, aux_path, do_evaporative=True, gasoline_path=gas_path, total_pop_by_prov=ip_pop_by_prov, - nuts_shapefile=provinces_path, small_cities_shp=small_cities_path) - - t.calculate_evaporative_emissions(tas_dir, evaporative_ef_file, date_to_simulate, - timestep_num, timestep_frq, speciation_map, profile_evaporative) - t.calculate_small_cities_emissions(small_cities_ef_file, - mol_weigths, speciation_map, profile_small_cities, - m_profile, w_profile, h_profile, date_to_simulate, timestep_num, timestep_frq) - # print t.make_population_by_nuts(provinces_path, aux_path) - print t.evaporative diff --git a/hermesv3_bu/modules/writing/writer.py b/hermesv3_bu/modules/writing/writer.py deleted file mode 100644 index 06e6f34b4136aa34365b2f02fee263ce579477ed..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/writing/writer.py +++ /dev/null @@ -1,604 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import sys -import timeit -import numpy as np -from mpi4py import MPI -from netCDF4 import Dataset -from hermesv3_gr.config import settings - - -class Writer(object): - """ - Class to Write the output file. - - :param path: Path to the destination file. - :type path: str - - :param grid: Grid of the destination file. - :type grid: Grid - - :param levels: List with the levels of the grid. - :type levels: list - - :param date: Date of the output file - :type date: datetime.datetime - - :param hours: List with the timestamp hours. - :type hours: list. - - :param global_attributes_path: Path to the file that contains the static global attributes. - :type global_attributes_path: str - - :param compress: Indicates if you want to compress the netCDF variable data. - :type compress: bool - - :param parallel: Indicates if you want to write in parallel mode. - :type parallel. bool - """ - - def __init__(self, path, grid, levels, date, hours, global_attributes_path, compress=True, parallel=False): - - self.path = path - self.grid = grid - self.compress = compress - self.parallel = parallel - - self.variables_attributes = None - self.levels = levels - self.date = date - self.hours = hours - - self.global_attributes = None - - self.global_attributes_path = global_attributes_path - - def write(self, inventory_list): - """ - Write the netCDF4 file with the pollutants of the given list of inventories. - - :param inventory_list: List of inventories. - :type inventory_list: list - - :return: True at end - :rtype: bool - """ - st_time = timeit.default_timer() - settings.write_log('') - settings.write_log("Writing netCDF output file {0} .".format(self.path)) - - self.set_variable_attributes(inventory_list) - self.change_variable_attributes() - if self.parallel: - if settings.rank == 0: - self.create_parallel_netcdf() - settings.comm.Barrier() - self.write_parallel_netcdf(inventory_list) - else: - self.write_serial_netcdf(inventory_list) - - settings.write_time('Writer', 'write', timeit.default_timer() - st_time) - return True - - def change_variable_attributes(self): - pass - - def create_parallel_netcdf(self): - """ - Implemented on inner class. - """ - return None - - def write_parallel_netcdf(self, emission_list): - """ - Append the data to the netCDF4 file already created in parallel mode. - - :param emission_list: Data to append. - :type emission_list: list - - :return: True at end. - :rtype: bool - """ - - st_time = timeit.default_timer() - - settings.write_log("\tAppending data to parallel NetCDF file.", level=2) - if settings.size > 1: - netcdf = Dataset(self.path, mode='a', format="NETCDF4", parallel=True, comm=settings.comm, info=MPI.Info()) - else: - netcdf = Dataset(self.path, mode='a', format="NETCDF4") - settings.write_log("\t\tParallel NetCDF file ready to write.", level=2) - index = 0 - # print "Rank {0} 2".format(rank) - for var_name in self.variables_attributes.iterkeys(): - - data = self.calculate_data_by_var(var_name, emission_list, self.grid.shape) - st_time = timeit.default_timer() - index += 1 - - var = netcdf.variables[var_name] - if settings.size > 1: - var.set_collective(True) - # Correcting NAN - if data is None: - data = 0 - var[:, :, self.grid.x_lower_bound:self.grid.x_upper_bound, - self.grid.y_lower_bound:self.grid.y_upper_bound] = data - - settings.write_log("\t\t\t'{0}' variable filled".format(var_name)) - - if 'cell_area' in netcdf.variables: - c_area = netcdf.variables['cell_area'] - c_area[self.grid.x_lower_bound:self.grid.x_upper_bound, - self.grid.y_lower_bound:self.grid.y_upper_bound] = self.grid.cell_area - - netcdf.close() - settings.write_time('Writer', 'write_parallel_netcdf', timeit.default_timer() - st_time, level=3) - return True - - def write_serial_netcdf(self, emission_list): - """ - Implemented on inner class. - """ - return None - - def set_variable_attributes(self, inventory_list): - """ - Change the variables_attribute parameter of the Writer class. - - :param inventory_list: list of invenotries. - :type inventory_list: list - - :return: True at end. - :rtype: bool - """ - st_time = timeit.default_timer() - empty_dict = {} - for inventory in inventory_list: - for emi in inventory.emissions: - if not emi['name'] in empty_dict: - dict_aux = emi.copy() - dict_aux['data'] = None - empty_dict[emi['name']] = dict_aux - - self.variables_attributes = empty_dict.values() - - settings.write_time('Writer', 'set_variable_attributes', timeit.default_timer() - st_time, level=3) - - return True - - def calculate_data_by_var(self, variable, inventory_list, shape): - """ - Calculate the date of the given variable throw the inventory list. - - :param variable: Variable to calculate. - :type variable: str - - :param inventory_list: Inventory list - :type inventory_list: list - - :param shape: Output desired shape. - :type shape: tuple - - :return: Data of the given variable. - :rtype: numpy.array - """ - st_time = timeit.default_timer() - settings.write_log("\t\t\t\tGetting data for '{0}' pollutant.".format(variable), level=3) - - data = None - - for ei in inventory_list: - for emission in ei.emissions: - if emission['name'] == variable: - if emission['data'] is not 0: - vertical_time = timeit.default_timer() - if ei.source_type == 'area': - if ei.vertical_factors is not None: - aux_data = emission['data'][np.newaxis, :, :] * ei.vertical_factors[:, np.newaxis, - np.newaxis] - else: - if len(emission['data'].shape) != 3: - aux_data = np.zeros((shape[1], shape[2], shape[3])) - aux_data[0, :, :] = emission['data'] - else: - aux_data = emission['data'] - elif ei.source_type == 'point': - aux_data = np.zeros((shape[1], shape[2] * shape[3])) - aux_data[ei.location['layer'], ei.location['FID']] = emission['data'] - aux_data = aux_data.reshape((shape[1], shape[2], shape[3])) - else: - aux_data = None - - settings.write_time('VerticalDistribution', 'calculate_data_by_var', - timeit.default_timer() - vertical_time, level=2) - del emission['data'] - - temporal_time = timeit.default_timer() - if data is None: - data = np.zeros(shape) - if ei.temporal_factors is not None: - data += aux_data[np.newaxis, :, :, :] * ei.temporal_factors[:, np.newaxis, :, :] - else: - data += aux_data[np.newaxis, :, :, :] - settings.write_time('TemporalDistribution', 'calculate_data_by_var', - timeit.default_timer() - temporal_time, level=2) - # Unit changes - data = self.unit_change(variable, data) - if data is not None: - data[data < 0] = 0 - settings.write_time('Writer', 'calculate_data_by_var', timeit.default_timer() - st_time, level=3) - return data - - def unit_change(self, variable, data): - """ - Implement on inner class - """ - return np.array([0]) - - @staticmethod - def calculate_displacements(counts): - """ - Calculate the index position of all the ranks. - - :param counts: Number of elements for rank - :type counts: list - - :return: Displacements - :rtype: list - """ - st_time = timeit.default_timer() - - new_list = [0] - accum = 0 - for counter in counts[:-1]: - accum += counter - new_list.append(accum) - - settings.write_time('Writer', 'calculate_displacements', timeit.default_timer() - st_time, level=3) - return new_list - - @staticmethod - def tuple_to_index(tuple_list, bidimensional=False): - """ - Get the index for a list of shapes. - - :param tuple_list: List os shapes. - :type tuple_list: list - - :param bidimensional: Indicates if the tuple is bidimensional. - :type bidimensional: bool - - :return: List of index - :rtype: list - """ - from operator import mul - st_time = timeit.default_timer() - - new_list = [] - for my_tuple in tuple_list: - if bidimensional: - new_list.append(my_tuple[-1] * my_tuple[-2]) - else: - new_list.append(reduce(mul, my_tuple)) - settings.write_time('Writer', 'tuple_to_index', timeit.default_timer() - st_time, level=3) - return new_list - - @staticmethod - def get_writer(output_model, path, grid, levels, date, hours, global_attributes_path, compress, parallel): - """ - Choose between the different writers depending on the desired output model. - - :param output_model: Name of the output model. Only accepted 'MONARCH, CMAQ or WRF_CHEM. - :type output_model: str - - :param path: Path to the destination file. - :type path: str - - :param grid: Grid of the destination file. - :type grid: Grid - - :param levels: List with the levels of the grid. - :type levels: list - - :param date: Date of the output file - :type date: datetime.datetime - - :param hours: List with the timestamp hours. - :type hours: list. - - :param global_attributes_path: Path to the file that contains the static global attributes. - :type global_attributes_path: str - - :param compress: Indicates if you want to compress the netCDF variable data. - :type compress: bool - - :param parallel: Indicates if you want to write in parallel mode. - :type parallel. bool - - :return: Writing object of the desired output model. - :rtype: Writer - """ - from hermesv3_gr.modules.writing.writer_cmaq import WriterCmaq - from hermesv3_gr.modules.writing.writer_monarch import WriterMonarch - from hermesv3_gr.modules.writing.writer_wrf_chem import WriterWrfChem - - settings.write_log('Selecting writing output type for {0}.'.format(output_model)) - if output_model.lower() == 'monarch': - return WriterMonarch(path, grid, levels, date, hours, global_attributes_path, compress, parallel) - elif output_model.lower() == 'cmaq': - return WriterCmaq(path, grid, levels, date, hours, global_attributes_path, compress, parallel) - elif output_model.lower() == 'wrf_chem': - return WriterWrfChem(path, grid, levels, date, hours, global_attributes_path, compress, parallel) - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise AttributeError("The desired '{0}' output model is not available. ".format(output_model) + - "Only accepted 'MONARCH, CMAQ or WRF_CHEM.") - sys.exit(1) - - @staticmethod - def write_netcdf(netcdf_path, center_latitudes, center_longitudes, data_list, - levels=None, date=None, hours=None, - boundary_latitudes=None, boundary_longitudes=None, cell_area=None, global_attributes=None, - regular_latlon=False, - roated=False, rotated_lats=None, rotated_lons=None, north_pole_lat=None, north_pole_lon=None, - lcc=False, lcc_x=None, lcc_y=None, lat_1_2=None, lon_0=None, lat_0=None, - mercator=False, lat_ts=None): - # TODO Deprecate - """ - Will be deprecated - """ - from netCDF4 import Dataset - from cf_units import Unit, encode_time - - if not (regular_latlon or lcc or roated or mercator): - regular_latlon = True - netcdf = Dataset(netcdf_path, mode='w', format="NETCDF4") - - # ===== Dimensions ===== - if regular_latlon: - var_dim = ('lat', 'lon',) - - # Latitude - if len(center_latitudes.shape) == 1: - netcdf.createDimension('lat', center_latitudes.shape[0]) - lat_dim = ('lat',) - elif len(center_latitudes.shape) == 2: - netcdf.createDimension('lat', center_latitudes.shape[0]) - lat_dim = ('lon', 'lat',) - else: - print 'ERROR: Latitudes must be on a 1D or 2D array instead of {0}'.format(len(center_latitudes.shape)) - sys.exit(1) - - # Longitude - if len(center_longitudes.shape) == 1: - netcdf.createDimension('lon', center_longitudes.shape[0]) - lon_dim = ('lon',) - elif len(center_longitudes.shape) == 2: - netcdf.createDimension('lon', center_longitudes.shape[1]) - lon_dim = ('lon', 'lat',) - else: - print 'ERROR: Longitudes must be on a 1D or 2D array instead of {0}'.format( - len(center_longitudes.shape)) - sys.exit(1) - elif roated: - var_dim = ('rlat', 'rlon',) - - # Rotated Latitude - if rotated_lats is None: - print 'ERROR: For rotated grids is needed the rotated latitudes.' - sys.exit(1) - netcdf.createDimension('rlat', len(rotated_lats)) - lat_dim = ('rlat', 'rlon',) - - # Rotated Longitude - if rotated_lons is None: - print 'ERROR: For rotated grids is needed the rotated longitudes.' - sys.exit(1) - netcdf.createDimension('rlon', len(rotated_lons)) - lon_dim = ('rlat', 'rlon',) - elif lcc or mercator: - var_dim = ('y', 'x',) - - netcdf.createDimension('y', len(lcc_y)) - lat_dim = ('y', 'x',) - - netcdf.createDimension('x', len(lcc_x)) - lon_dim = ('y', 'x',) - else: - lat_dim = None - lon_dim = None - var_dim = None - - # Levels - if levels is not None: - netcdf.createDimension('lev', len(levels)) - - # Bounds - if boundary_latitudes is not None: - # print boundary_latitudes.shape - # print len(boundary_latitudes[0, 0]) - try: - netcdf.createDimension('nv', len(boundary_latitudes[0, 0])) - except TypeError: - netcdf.createDimension('nv', boundary_latitudes.shape[1]) - - # sys.exit() - - # Time - netcdf.createDimension('time', None) - - # ===== Variables ===== - # Time - if date is None: - time = netcdf.createVariable('time', 'd', ('time',), zlib=True) - time.units = "months since 2000-01-01 00:00:00" - time.standard_name = "time" - time.calendar = "gregorian" - time.long_name = "time" - time[:] = [0.] - else: - time = netcdf.createVariable('time', 'd', ('time',), zlib=True) - # print u.offset_by_time(encode_time(date.year, date.month, date.day, date.hour, date.minute, date.second)) - # Unit('hour since 1970-01-01 00:00:00.0000000 UTC') - time.units = str(Unit('hours').offset_by_time( - encode_time(date.year, date.month, date.day, date.hour, date.minute, date.second))) - time.standard_name = "time" - time.calendar = "gregorian" - time.long_name = "time" - time[:] = hours - - # Latitude - lats = netcdf.createVariable('lat', 'f', lat_dim, zlib=True) - lats.units = "degrees_north" - lats.axis = "Y" - lats.long_name = "latitude coordinate" - lats.standard_name = "latitude" - lats[:] = center_latitudes - - if boundary_latitudes is not None: - lats.bounds = "lat_bnds" - lat_bnds = netcdf.createVariable('lat_bnds', 'f', lat_dim + ('nv',), zlib=True) - # print lat_bnds[:].shape, boundary_latitudes.shape - lat_bnds[:] = boundary_latitudes - - # Longitude - lons = netcdf.createVariable('lon', 'f', lon_dim, zlib=True) - - lons.units = "degrees_east" - lons.axis = "X" - lons.long_name = "longitude coordinate" - lons.standard_name = "longitude" - # print 'lons:', lons[:].shape, center_longitudes.shape - lons[:] = center_longitudes - if boundary_longitudes is not None: - lons.bounds = "lon_bnds" - lon_bnds = netcdf.createVariable('lon_bnds', 'f', lon_dim + ('nv',), zlib=True) - # print lon_bnds[:].shape, boundary_longitudes.shape - lon_bnds[:] = boundary_longitudes - - if roated: - # Rotated Latitude - rlat = netcdf.createVariable('rlat', 'f', ('rlat',), zlib=True) - rlat.long_name = "latitude in rotated pole grid" - rlat.units = Unit("degrees").symbol - rlat.standard_name = "grid_latitude" - rlat[:] = rotated_lats - - # Rotated Longitude - rlon = netcdf.createVariable('rlon', 'f', ('rlon',), zlib=True) - rlon.long_name = "longitude in rotated pole grid" - rlon.units = Unit("degrees").symbol - rlon.standard_name = "grid_longitude" - rlon[:] = rotated_lons - if lcc or mercator: - x_var = netcdf.createVariable('x', 'd', ('x',), zlib=True) - x_var.units = Unit("km").symbol - x_var.long_name = "x coordinate of projection" - x_var.standard_name = "projection_x_coordinate" - x_var[:] = lcc_x - - y_var = netcdf.createVariable('y', 'd', ('y',), zlib=True) - y_var.units = Unit("km").symbol - y_var.long_name = "y coordinate of projection" - y_var.standard_name = "projection_y_coordinate" - y_var[:] = lcc_y - - cell_area_dim = var_dim - # Levels - if levels is not None: - var_dim = ('lev',) + var_dim - lev = netcdf.createVariable('lev', 'f', ('lev',), zlib=True) - lev.units = Unit("m").symbol - lev.positive = 'up' - lev[:] = levels - - # All variables - if len(data_list) is 0: - var = netcdf.createVariable('aux_var', 'f', ('time',) + var_dim, zlib=True) - var[:] = 0 - for variable in data_list: - # print ('time',) + var_dim - var = netcdf.createVariable(variable['name'], 'f', ('time',) + var_dim, zlib=True) - var.units = Unit(variable['units']).symbol - if 'long_name' in variable: - var.long_name = str(variable['long_name']) - if 'standard_name' in variable: - var.standard_name = str(variable['standard_name']) - if 'cell_method' in variable: - var.cell_method = str(variable['cell_method']) - var.coordinates = "lat lon" - if cell_area is not None: - var.cell_measures = 'area: cell_area' - if regular_latlon: - var.grid_mapping = 'crs' - elif roated: - var.grid_mapping = 'rotated_pole' - elif lcc: - var.grid_mapping = 'Lambert_conformal' - elif mercator: - var.grid_mapping = 'mercator' - try: - var[:] = variable['data'] - except ValueError: - print 'VAR ERROR, netcdf shape: {0}, variable shape: {1}'.format(var[:].shape, variable['data'].shape) - - # Grid mapping - if regular_latlon: - # CRS - mapping = netcdf.createVariable('crs', 'i') - mapping.grid_mapping_name = "latitude_longitude" - mapping.semi_major_axis = 6371000.0 - mapping.inverse_flattening = 0 - elif roated: - # Rotated pole - mapping = netcdf.createVariable('rotated_pole', 'c') - mapping.grid_mapping_name = 'rotated_latitude_longitude' - mapping.grid_north_pole_latitude = north_pole_lat - mapping.grid_north_pole_longitude = north_pole_lon - elif lcc: - # CRS - mapping = netcdf.createVariable('Lambert_conformal', 'i') - mapping.grid_mapping_name = "lambert_conformal_conic" - mapping.standard_parallel = lat_1_2 - mapping.longitude_of_central_meridian = lon_0 - mapping.latitude_of_projection_origin = lat_0 - elif mercator: - # Mercator - mapping = netcdf.createVariable('mercator', 'i') - mapping.grid_mapping_name = "mercator" - mapping.longitude_of_projection_origin = lon_0 - mapping.standard_parallel = lat_ts - - # Cell area - if cell_area is not None: - c_area = netcdf.createVariable('cell_area', 'f', cell_area_dim) - c_area.long_name = "area of the grid cell" - c_area.standard_name = "cell_area" - c_area.units = Unit("m2").symbol - # print c_area[:].shape, cell_area.shape - c_area[:] = cell_area - - if global_attributes is not None: - netcdf.setncatts(global_attributes) - - netcdf.close() diff --git a/hermesv3_bu/modules/writing/writer_cmaq.py b/hermesv3_bu/modules/writing/writer_cmaq.py deleted file mode 100644 index 7b3480c780f442559400412192c7367b0a61b0e6..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/writing/writer_cmaq.py +++ /dev/null @@ -1,624 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import sys -import timeit -import numpy as np -from netCDF4 import Dataset -from mpi4py import MPI -from hermesv3_gr.modules.writing.writer import Writer -from hermesv3_gr.config import settings - - -class WriterCmaq(Writer): - """ - Class to Write the output file for CMAQ Chemical Transport Model CCTM. - - :param path: Path to the destination file. - :type path: str - - :param grid: Grid of the destination file. - :type grid: Grid - - :param levels: List with the levels of the grid. - :type levels: list - - :param date: Date of the output file - :type date: datetime.datetime - - :param hours: List with the timestamp hours. - :type hours: list. - - :param global_attributes_path: Path to the file that contains the static global attributes. - :type global_attributes_path: str - - :param compress: Indicates if you want to compress the netCDF variable data. - :type compress: bool - - :param parallel: Indicates if you want to write in parallel mode. - :type parallel. bool - """ - - def __init__(self, path, grid, levels, date, hours, global_attributes_path, compress=True, parallel=False): - super(WriterCmaq, self).__init__(path, grid, levels, date, hours, global_attributes_path, compress, parallel) - - self.global_attributes_order = [ - 'IOAPI_VERSION', 'EXEC_ID', 'FTYPE', 'CDATE', 'CTIME', 'WDATE', 'WTIME', 'SDATE', 'STIME', 'TSTEP', 'NTHIK', - 'NCOLS', 'NROWS', 'NLAYS', 'NVARS', 'GDTYP', 'P_ALP', 'P_BET', 'P_GAM', 'XCENT', 'YCENT', 'XORIG', 'YORIG', - 'XCELL', 'YCELL', 'VGTYP', 'VGTOP', 'VGLVLS', 'GDNAM', 'UPNAM', 'FILEDESC', 'HISTORY', 'VAR-LIST'] - - def unit_change(self, variable, data): - # TODO Documentation - """ - - :param variable: - :param data: - :return: - """ - from cf_units import Unit - - if data is not None: - units = None - for var_name in self.variables_attributes: - if var_name == variable: - units = self.variables_attributes[var_name]['units'] - break - - if Unit(units).symbol == Unit('mol.s-1').symbol: - data = data * 1000 * self.grid.cell_area - elif Unit(units).symbol == Unit('g.s-1').symbol: - data = data * 1000 * self.grid.cell_area - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format(units, variable) + - "Should be 'mol.s-1.m-2' or 'kg.s-1.m-2'") - sys.exit(1) - return data - - def change_variable_attributes(self): - """ - Modify the emission list to be consistent to use the output as input for CMAQ model. - - :return: Emission list ready for CMAQ - :rtype: dict - """ - from cf_units import Unit - - new_variable_dict = {} - for variable in self.variables_attributes: - if Unit(variable['units']).symbol == Unit('mol.s-1').symbol: - new_variable_dict[variable['name']] = { - 'units': "{:<16}".format('mole/s'), - 'var_desc': "{:<80}".format(variable['long_name']), - 'long_name': "{:<16}".format(variable['name']), - } - elif Unit(variable['units']).symbol == Unit('g.s-1').symbol: - new_variable_dict[variable['name']] = { - 'units': "{:<16}".format('g/s'), - 'var_desc': "{:<80}".format(variable['long_name']), - 'long_name': "{:<16}".format(variable['name']), - } - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError("The unit '{0}' of specie {1} is not ".format(variable['units'], variable['name']) + - "defined correctly. Should be 'mol.s-1' or 'g.s-1'") - sys.exit(1) - - self.variables_attributes = new_variable_dict - - @staticmethod - def create_tflag(st_date, hours_array, num_vars): - """ - Create the content of the CMAQ variable TFLAG - - :param st_date: Starting date - :type st_date: datetime.datetime - - :param hours_array: Array with as elements as time steps. Each element has the delta hours from the starting - date. - :type hours_array: numpy.array - - :param num_vars: Number of variables that will contain the NetCDF. - :type num_vars: int - - :return: Array with the content of TFLAG - :rtype: numpy.array - """ - from datetime import timedelta - - a = np.array([[[]]]) - - for inc_hours in hours_array: - date = st_date + timedelta(hours=inc_hours) - b = np.array([[int(date.strftime('%Y%j'))], [int(date.strftime('%H%M%S'))]] * num_vars) - a = np.append(a, b) - - a.shape = (len(hours_array), 2, num_vars) - return a - - @staticmethod - def str_var_list(var_list): - """ - Transform a list to a string with the elements with 16 white spaces. - - :param var_list: List of variables. - :type var_list: list - - :return: List transformed on string. - :rtype: str - """ - str_var_list = "" - for var in var_list: - str_var_list += "{:<16}".format(var) - - return str_var_list - - def read_global_attributes(self): - # TODO Documentation - """ - - :return: - """ - import pandas as pd - from warnings import warn as warning - float_atts = ['VGTOP'] - int_atts = ['FTYPE', 'NTHIK', 'VGTYP'] - str_atts = ['EXEC_ID', 'GDNAM'] - list_float_atts = ['VGLVLS'] - - atts_dict = { - 'EXEC_ID': "{:<80}".format('0.1alpha'), - 'FTYPE': np.int32(1), - 'NTHIK': np.int32(1), - 'VGTYP': np.int32(7), - 'VGTOP': np.float32(5000.), - 'VGLVLS': np.array([1., 0.], dtype=np.float32), - 'GDNAM': "{:<16}".format(''), - } - - if self.global_attributes_path is not None: - df = pd.read_csv(self.global_attributes_path) - - for att in atts_dict.iterkeys(): - try: - if att in int_atts: - atts_dict[att] = np.int32(df.loc[df['attribute'] == att, 'value'].item()) - elif att in float_atts: - atts_dict[att] = np.float32(df.loc[df['attribute'] == att, 'value'].item()) - elif att in str_atts: - atts_dict[att] = str(df.loc[df['attribute'] == att, 'value'].item()) - elif att in list_float_atts: - atts_dict[att] = np.array(df.loc[df['attribute'] == att, 'value'].item().split(), - dtype=np.float32) - except ValueError: - settings.write_log('WARNING: The global attribute {0} is not defined;'.format(att) + - ' Using default value {0}'.format(atts_dict[att])) - if settings.rank == 0: - warning('WARNING: The global attribute {0} is not defined; Using default value {1}'.format( - att, atts_dict[att])) - - else: - settings.write_log('WARNING: Check the .err file to get more information.') - message = 'WARNING: No output attributes defined, check the output_attributes' - message += ' parameter of the configuration file.\nUsing default values:' - for key, value in atts_dict.iteritems(): - message += '\n\t{0} = {1}'.format(key, value) - if settings.rank == 0: - warning(message) - - return atts_dict - - def create_global_attributes(self, var_list): - """ - Create the global attributes and the order that they have to be filled. - - :param var_list: List of variables - :type var_list: list - - :return: Dict of global attributes and a list with the keys ordered. - :rtype: tuple - """ - from datetime import datetime - - global_attributes = self.read_global_attributes() - - if len(self.hours) > 1: - tstep = (self.hours[1] - self.hours[0]) * 10000 - else: - tstep = 1 * 10000 - - now = datetime.now() - global_attributes['IOAPI_VERSION'] = 'None: made only with NetCDF libraries' - global_attributes['CDATE'] = np.int32(now.strftime('%Y%j')) - global_attributes['CTIME'] = np.int32(now.strftime('%H%M%S')) - global_attributes['WDATE'] = np.int32(now.strftime('%Y%j')) - global_attributes['WTIME'] = np.int32(now.strftime('%H%M%S')) - global_attributes['SDATE'] = np.int32(self.date.strftime('%Y%j')) - global_attributes['STIME'] = np.int32(self.date.strftime('%H%M%S')) - global_attributes['TSTEP'] = np.int32(tstep) - global_attributes['NLAYS'] = np.int32(len(self.levels)) - global_attributes['NVARS'] = np.int32(len(var_list)) - global_attributes['UPNAM'] = "{:<16}".format('HERMESv3') - global_attributes['FILEDESC'] = 'Emissions generated by HERMESv3_GR.' - global_attributes['HISTORY'] = \ - 'Code developed by Barcelona Supercomputing Center (BSC, https://www.bsc.es/).' + \ - 'Developer: Carles Tena Medina (carles.tena@bsc.es)' + \ - 'Reference: Guevara et al., 2018, GMD., in preparation.' - global_attributes['VAR-LIST'] = self.str_var_list(var_list) - - if self.grid.grid_type == 'lcc': - global_attributes['GDTYP'] = np.int32(2) - global_attributes['NCOLS'] = np.int32(self.grid.nx) - global_attributes['NROWS'] = np.int32(self.grid.ny) - global_attributes['P_ALP'] = np.float(self.grid.lat_1) - global_attributes['P_BET'] = np.float(self.grid.lat_2) - global_attributes['P_GAM'] = np.float(self.grid.lon_0) - global_attributes['XCENT'] = np.float(self.grid.lon_0) - global_attributes['YCENT'] = np.float(self.grid.lat_0) - global_attributes['XORIG'] = np.float(self.grid.x_0) - np.float(self.grid.inc_x) / 2 - global_attributes['YORIG'] = np.float(self.grid.y_0) - np.float(self.grid.inc_y) / 2 - global_attributes['XCELL'] = np.float(self.grid.inc_x) - global_attributes['YCELL'] = np.float(self.grid.inc_y) - - return global_attributes - - @staticmethod - def create_cmaq_netcdf(netcdf_path, center_latitudes, center_longitudes, data_list, levels=None, date=None, - hours=None, regular_lat_lon=False, rotated=False, nx=None, ny=None, lat_1=None, lat_2=None, - lon_0=None, lat_0=None, x_0=None, y_0=None, inc_x=None, inc_y=None): - # TODO Documentation - """ - - :param netcdf_path: - :param center_latitudes: - :param center_longitudes: - :param data_list: - :param levels: - :param date: - :param hours: - :param regular_lat_lon: - :param rotated: - :param nx: - :param ny: - :param lat_1: - :param lat_2: - :param lon_0: - :param lat_0: - :param x_0: - :param y_0: - :param inc_x: - :param inc_y: - :return: - """ - - data_list, var_list = WriterCmaq.change_variable_attributes(data_list) - - if settings.writing_serial: - WriterCmaq.write_serial_netcdf( - netcdf_path, center_latitudes, center_longitudes, data_list, - levels=levels, date=date, hours=hours, - global_attributes=WriterCmaq.create_global_attributes(date, nx, ny, len(levels), lat_1, lat_2, lon_0, - lat_0, x_0, y_0, inc_x, inc_y, var_list), - regular_lat_lon=regular_lat_lon, - rotated=rotated, ) - else: - WriterCmaq.write_parallel_netcdf( - netcdf_path, center_latitudes, center_longitudes, data_list, - levels=levels, date=date, hours=hours, - global_attributes=WriterCmaq.create_global_attributes(date, nx, ny, len(levels), lat_1, lat_2, lon_0, - lat_0, x_0, y_0, inc_x, inc_y, var_list), - regular_lat_lon=regular_lat_lon, - rotated=rotated, ) - - @staticmethod - def write_netcdf(netcdf_path, center_latitudes, center_longitudes, data_list, levels=None, date=None, hours=None, - global_attributes=None, regular_lat_lon=False, rotated=False): - # TODO Documentation - """ - - :param netcdf_path: - :param center_latitudes: - :param center_longitudes: - :param data_list: - :param levels: - :param date: - :param hours: - :param global_attributes: - :param regular_lat_lon: - :param rotated: - :return: - """ - if regular_lat_lon: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError('ERROR: Regular Lat Lon grid not implemented for CMAQ') - sys.exit(1) - - elif rotated: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError('ERROR: Rotated grid not implemented for CMAQ') - sys.exit(1) - - netcdf = Dataset(netcdf_path, mode='w', format="NETCDF4") - - # ===== Dimensions ===== - netcdf.createDimension('TSTEP', len(hours)) - netcdf.createDimension('DATE-TIME', 2) - netcdf.createDimension('LAY', len(levels)) - netcdf.createDimension('VAR', len(data_list)) - netcdf.createDimension('ROW', center_latitudes.shape[0]) - netcdf.createDimension('COL', center_longitudes.shape[1]) - - # ===== Variables ===== - tflag = netcdf.createVariable('TFLAG', 'i', ('TSTEP', 'VAR', 'DATE-TIME',)) - tflag.setncatts({'units': "{:<16}".format(''), 'long_name': "{:<16}".format('TFLAG'), - 'var_desc': "{:<80}".format('Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS')}) - tflag[:] = WriterCmaq.create_tflag(date, hours, len(data_list)) - - # Rest of variables - for variable in data_list: - var = netcdf.createVariable(variable['name'], 'f', ('TSTEP', 'LAY', 'ROW', 'COL',), zlib=True) - var.units = variable['units'] - var.long_name = str(variable['long_name']) - var.var_desc = str(variable['var_desc']) - var[:] = variable['data'] - - # ===== Global attributes ===== - global_attributes, order = global_attributes - for attribute in order: - netcdf.setncattr(attribute, global_attributes[attribute]) - - netcdf.close() - - def create_parallel_netcdf(self): - # TODO Documentation - """ - Create an empty netCDF - """ - st_time = timeit.default_timer() - settings.write_log("\tCreating parallel NetCDF file.", level=2) - # netcdf = Dataset(netcdf_path, mode='w', format="NETCDF4", parallel=True, comm=settings.comm, info=MPI.Info()) - netcdf = Dataset(self.path, mode='w', format="NETCDF4") - - # ===== Dimensions ===== - settings.write_log("\t\tCreating NetCDF dimensions.", level=2) - netcdf.createDimension('TSTEP', len(self.hours)) - # netcdf.createDimension('TSTEP', None) - settings.write_log("\t\t\t'TSTEP' dimension: {0}".format('UNLIMITED ({0})'.format(len(self.hours))), level=3) - - netcdf.createDimension('DATE-TIME', 2) - settings.write_log("\t\t\t'DATE-TIME' dimension: {0}".format(2), level=3) - - netcdf.createDimension('LAY', len(self.levels)) - settings.write_log("\t\t\t'LAY' dimension: {0}".format(len(self.levels)), level=3) - - netcdf.createDimension('VAR', len(self.variables_attributes)) - settings.write_log("\t\t\t'VAR' dimension: {0}".format(len(self.variables_attributes)), level=3) - - netcdf.createDimension('ROW', self.grid.center_latitudes.shape[0]) - settings.write_log("\t\t\t'ROW' dimension: {0}".format(self.grid.center_latitudes.shape[0]), level=3) - - netcdf.createDimension('COL', self.grid.center_longitudes.shape[1]) - settings.write_log("\t\t\t'COL' dimension: {0}".format(self.grid.center_longitudes.shape[1]), level=3) - - # ===== Variables ===== - settings.write_log("\t\tCreating NetCDF variables.", level=2) - tflag = netcdf.createVariable('TFLAG', 'i', ('TSTEP', 'VAR', 'DATE-TIME',)) - tflag.setncatts({'units': "{:<16}".format(''), 'long_name': "{:<16}".format('TFLAG'), - 'var_desc': "{:<80}".format('Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS')}) - tflag[:] = self.create_tflag(self.date, self.hours, len(self.variables_attributes)) - settings.write_log("\t\t\t'TFLAG' variable created with size: {0}".format(tflag[:].shape), level=3) - - index = 0 - # data_list, var_list = self.change_variable_attributes(self.variables_attributes) - for var_name in self.variables_attributes.iterkeys(): - index += 1 - var = netcdf.createVariable(var_name, 'f', ('TSTEP', 'LAY', 'ROW', 'COL',), zlib=self.compress) - var.setncatts(self.variables_attributes[var_name]) - settings.write_log("\t\t\t'{0}' variable created with size: {1}".format(var_name, var[:].shape) + - "\n\t\t\t\t'{0}' variable will be filled later.".format(var_name), level=3) - - # ===== Global attributes ===== - settings.write_log("\t\tCreating NetCDF metadata.", level=2) - global_attributes = self.create_global_attributes(self.variables_attributes.keys()) - for attribute in self.global_attributes_order: - netcdf.setncattr(attribute, global_attributes[attribute]) - - netcdf.close() - - settings.write_time('WriterCmaq', 'create_parallel_netcdf', timeit.default_timer() - st_time, level=3) - - return True - - def write_serial_netcdf(self, emission_list): - """ - Write the netCDF in serial mode. - - :param emission_list: List of the processed emissions for the different emission inventories - :type emission_list: list - - :return: True when it finish well. - :rtype: bool - """ - st_time = timeit.default_timer() - - mpi_numpy = False - mpi_vector = True - - # Gathering the index - if mpi_numpy or mpi_vector: - rank_position = np.array([self.grid.x_lower_bound, self.grid.x_upper_bound, self.grid.y_lower_bound, - self.grid.y_upper_bound], dtype='i') - full_position = None - if settings.rank == 0: - full_position = np.empty([settings.size, 4], dtype='i') - settings.comm.Gather(rank_position, full_position, root=0) - - if settings.rank == 0: - netcdf = Dataset(self.path, mode='w', format="NETCDF4") - - # ===== Dimensions ===== - settings.write_log("\tCreating NetCDF file.", level=2) - settings.write_log("\t\tCreating NetCDF dimensions.", level=2) - netcdf.createDimension('TSTEP', len(self.hours)) - settings.write_log("\t\t\t'TSTEP' dimension: {0}".format(len(self.hours)), level=3) - netcdf.createDimension('DATE-TIME', 2) - settings.write_log("\t\t\t'DATE-TIME' dimension: {0}".format(2), level=3) - netcdf.createDimension('LAY', len(self.levels)) - settings.write_log("\t\t\t'LAY' dimension: {0}".format(len(self.levels)), level=3) - netcdf.createDimension('VAR', len(self.variables_attributes)) - settings.write_log("\t\t\t'VAR' dimension: {0}".format(len(self.variables_attributes)), level=3) - netcdf.createDimension('ROW', self.grid.center_latitudes.shape[0]) - settings.write_log("\t\t\t'ROW' dimension: {0}".format(self.grid.center_latitudes.shape[0]), level=3) - netcdf.createDimension('COL', self.grid.center_longitudes.shape[1]) - settings.write_log("\t\t\t'COL' dimension: {0}".format(self.grid.center_longitudes.shape[1]), level=3) - - # ===== Variables ===== - settings.write_log("\t\tCreating NetCDF variables.", level=2) - tflag = netcdf.createVariable('TFLAG', 'i', ('TSTEP', 'VAR', 'DATE-TIME',)) - tflag.setncatts({'units': "{:<16}".format(''), 'long_name': "{:<16}".format('TFLAG'), - 'var_desc': "{:<80}".format('Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS')}) - tflag[:] = self.create_tflag(self.date, self.hours, len(self.variables_attributes)) - settings.write_log("\t\t\t'TFLAG' variable created with size: {0}".format(tflag[:].shape), level=3) - - full_shape = None - index = 0 - # data_list, var_list = self.change_variable_attributes(self.variables_attributes) - for var_name in self.variables_attributes.iterkeys(): - if settings.size != 1: - settings.write_log("\t\t\tGathering {0} data.".format(var_name), level=3) - rank_data = self.calculate_data_by_var(var_name, emission_list, self.grid.shape) - if mpi_numpy or mpi_vector: - if rank_data is not None: - root_shape = settings.comm.bcast(rank_data.shape, root=0) - if full_shape is None: - full_shape = settings.comm.allgather(rank_data.shape) - # print 'Rank {0} full_shape: {1}\n'.format(settings.rank, full_shape) - if mpi_numpy: - if settings.size != 1: - if settings.rank == 0: - recvbuf = np.empty((settings.size,) + rank_data.shape) - else: - recvbuf = None - if root_shape != rank_data.shape: - rank_data_aux = np.empty(root_shape) - rank_data_aux[:, :, :, :-1] = rank_data - rank_data = rank_data_aux - # print 'Rank {0} data.shape {1}'.format(settings.rank, rank_data.shape) - settings.comm.Gather(rank_data, recvbuf, root=0) - else: - recvbuf = rank_data - elif mpi_vector: - if rank_data is not None: - counts_i = self.tuple_to_index(full_shape) - rank_buff = [rank_data, counts_i[settings.rank]] - if settings.rank == 0: - displacements = self.calculate_displacements(counts_i) - recvdata = np.empty(sum(counts_i), dtype=settings.precision) - else: - displacements = None - recvdata = None - if settings.precision == np.float32: - recvbuf = [recvdata, counts_i, displacements, MPI.FLOAT] - elif settings.precision == np.float64: - recvbuf = [recvdata, counts_i, displacements, MPI.DOUBLE] - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError('ERROR: precision {0} unknown'.format(settings.precision)) - sys.exit(1) - - settings.comm.Gatherv(rank_buff, recvbuf, root=0) - - else: - if settings.size != 1: - data = settings.comm.gather(rank_data, root=0) - else: - data = rank_data - - if settings.rank == 0: - if not (mpi_numpy or mpi_vector): - if settings.size != 1: - try: - data = np.concatenate(data, axis=3) - except (UnboundLocalError, TypeError, IndexError): - data = 0 - st_time = timeit.default_timer() - index += 1 - - var = netcdf.createVariable(var_name, 'f', ('TSTEP', 'LAY', 'ROW', 'COL',), zlib=self.compress) - var.setncatts(self.variables_attributes[var_name]) - # var.units = variable['units'] - # var.long_name = str(variable['long_name']) - # var.var_desc = str(variable['var_desc']) - # var[:] = variable['data'] - - if mpi_numpy: - data = np.ones(var[:].shape, dtype=settings.precision) * 100 - for i in xrange(settings.size): - try: - if i == 0: - var[:, :, :, :full_position[i][3]] = recvbuf[i] - elif i == settings.size - 1: - var[:, :, :, full_position[i][2]:] = recvbuf[i, :, :, :, :-1] - else: - var[:, :, :, full_position[i][2]:full_position[i][3]] = \ - recvbuf[i, :, :, :, : full_shape[i][-1]] - except ValueError: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError("ERROR on i {0} ".format(i) + - "data shape: {0} ".format(data[:, :, :, full_position[i][2]:].shape) + - "recvbuf shape {0}".format(recvbuf[i].shape)) - sys.exit(1) - - elif mpi_vector: - if rank_data is not None: - data = np.empty(var[:].shape, dtype=settings.precision) - for i in xrange(settings.size): - # print 'Resizeing {0}'.format(i) - if not i == settings.size - 1: - data[:, :, full_position[i][0]:full_position[i][1], - full_position[i][2]:full_position[i][3]] = \ - np.array(recvbuf[0][displacements[i]: displacements[i + 1]]).reshape(full_shape[i]) - else: - data[:, :, full_position[i][0]:full_position[i][1], - full_position[i][2]:full_position[i][3]] = \ - np.array(recvbuf[0][displacements[i]:]).reshape(full_shape[i]) - else: - data = 0 - var[:] = data - else: - var[:] = data - settings.write_log("\t\t\t'{0}' variable created with size: {1}".format(var_name, var[:].shape), - level=3) - settings.write_log("\t\tCreating NetCDF metadata.", level=2) - if settings.rank == 0: - # ===== Global attributes ===== - global_attributes = self.create_global_attributes(self.variables_attributes.keys()) - for attribute in self.global_attributes_order: - netcdf.setncattr(attribute, global_attributes[attribute]) - - netcdf.close() - settings.write_time('WriterCmaq', 'write_serial_netcdf', timeit.default_timer() - st_time, level=3) - return True diff --git a/hermesv3_bu/modules/writing/writer_monarch.py b/hermesv3_bu/modules/writing/writer_monarch.py deleted file mode 100644 index 3321b06c0f0945da74668f8e8d50d6c6bb503663..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/writing/writer_monarch.py +++ /dev/null @@ -1,800 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import sys -import timeit -import numpy as np -from netCDF4 import Dataset -from mpi4py import MPI -from hermesv3_gr.modules.writing.writer import Writer -from hermesv3_gr.config import settings - - -class WriterMonarch(Writer): - """ - Class to Write the output file in CF-1.6 conventions. - - :param path: Path to the destination file. - :type path: str - - :param grid: Grid of the destination file. - :type grid: Grid - - :param levels: List with the levels of the grid. - :type levels: list - - :param date: Date of the output file - :type date: datetime.datetime - - :param hours: List with the timestamp hours. - :type hours: list. - - :param global_attributes_path: Path to the file that contains the static global attributes. - :type global_attributes_path: str - - :param compress: Indicates if you want to compress the netCDF variable data. - :type compress: bool - - :param parallel: Indicates if you want to write in parallel mode. - :type parallel. bool - """ - - def __init__(self, path, grid, levels, date, hours, global_attributes_path, compress=True, parallel=False): - super(WriterMonarch, self).__init__(path, grid, levels, date, hours, global_attributes_path, compress, parallel) - - # self.global_attributes = { - # 'nom_attribut': 'value_attribut' - # } - - def unit_change(self, variable, data): - """ - Do the unit conversions of the data. - - :param variable: Variable to convert. - :type variable: dict - - :param data: Data to change. - :type data: numpy.array - - :return: Data with the new units. - :rtype: numpy.array - """ - from cf_units import Unit - st_time = timeit.default_timer() - - if data is not None: - units = None - for var_name in self.variables_attributes: - if var_name == variable: - units = self.variables_attributes[var_name]['units'] - break - - if Unit(units).symbol == Unit('mol.s-1.m-2').symbol: - data = data * 1000 - elif Unit(units).symbol == Unit('kg.s-1.m-2').symbol: - pass - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError("The unit '{0}' of specie {1} is not defined correctly. ".format(units, variable) + - "Should be 'mol.s-1.m-2' or 'kg.s-1.m-2'") - sys.exit(1) - settings.write_time('WriterMonarch', 'unit_change', timeit.default_timer() - st_time, level=3) - return data - - def change_variable_attributes(self): - """ - Modify the emission list to be consistent to use the output as input for CMAQ model. - - :return: Emission list ready for CMAQ - :rtype: dict - """ - new_variable_dict = {} - for variable in self.variables_attributes: - new_variable_dict[variable['name']] = variable - del new_variable_dict[variable['name']]['name'] - - self.variables_attributes = new_variable_dict - - def create_parallel_netcdf(self): - """ - Create an empty netCDF4. - - :return: True at end. - :rtype: bool - """ - from cf_units import Unit, encode_time - - st_time = timeit.default_timer() - - RegularLatLon = False - Rotated = False - LambertConformalConic = False - if self.grid.grid_type == 'global': - RegularLatLon = True - elif self.grid.grid_type == 'rotated': - Rotated = True - elif self.grid.grid_type == 'lcc': - LambertConformalConic = True - - settings.write_log("\tCreating parallel NetCDF file.", level=2) - # netcdf = Dataset(netcdf_path, mode='w', format="NETCDF4", parallel=True, comm=settings.comm, info=MPI.Info()) - netcdf = Dataset(self.path, mode='w', format="NETCDF4") - # print 'NETCDF PATH: {0}'.format(netcdf_path) - - settings.write_log("\t\tCreating NetCDF dimensions.", level=2) - # ===== Dimensions ===== - if RegularLatLon: - var_dim = ('lat', 'lon',) - - # Latitude - if len(self.grid.center_latitudes.shape) == 1: - netcdf.createDimension('lat', self.grid.center_latitudes.shape[0]) - settings.write_log("\t\t\t'lat' dimension: {0}".format(self.grid.center_latitudes.shape[0]), level=3) - lat_dim = ('lat',) - elif len(self.grid.center_latitudes.shape) == 2: - netcdf.createDimension('lat', self.grid.center_latitudes.shape[0]) - settings.write_log("\t\t\t'lat' dimension: {0}".format(self.grid.center_latitudes.shape[0]), level=3) - lat_dim = ('lon', 'lat', ) - else: - print 'ERROR: Latitudes must be on a 1D or 2D array instead of {0}'.format( - len(self.grid.center_latitudes.shape)) - sys.exit(1) - - # Longitude - if len(self.grid.center_longitudes.shape) == 1: - netcdf.createDimension('lon', self.grid.center_longitudes.shape[0]) - settings.write_log("\t\t\t'lon' dimension: {0}".format(self.grid.center_longitudes.shape[0]), level=3) - lon_dim = ('lon',) - elif len(self.grid.center_longitudes.shape) == 2: - netcdf.createDimension('lon', self.grid.center_longitudes.shape[1]) - settings.write_log("\t\t\t'lon' dimension: {0}".format(self.grid.center_longitudes.shape[1]), level=3) - lon_dim = ('lon', 'lat', ) - else: - print 'ERROR: Longitudes must be on a 1D or 2D array instead of {0}'.format( - len(self.grid.center_longitudes.shape)) - sys.exit(1) - elif Rotated: - var_dim = ('rlat', 'rlon',) - - # Rotated Latitude - if self.grid.rlat is None: - print 'ERROR: For rotated grids is needed the rotated latitudes.' - sys.exit(1) - netcdf.createDimension('rlat', len(self.grid.rlat)) - settings.write_log("\t\t\t'rlat' dimension: {0}".format(len(self.grid.rlat)), level=3) - lat_dim = ('rlat', 'rlon',) - - # Rotated Longitude - if self.grid.rlon is None: - print 'ERROR: For rotated grids is needed the rotated longitudes.' - sys.exit(1) - netcdf.createDimension('rlon', len(self.grid.rlon)) - settings.write_log("\t\t\t'rlon' dimension: {0}".format(len(self.grid.rlon)), level=3) - lon_dim = ('rlat', 'rlon',) - - elif LambertConformalConic: - var_dim = ('y', 'x',) - - netcdf.createDimension('y', len(self.grid.y)) - settings.write_log("\t\t\t'y' dimension: {0}".format(len(self.grid.y)), level=3) - lat_dim = ('y', 'x', ) - - netcdf.createDimension('x', len(self.grid.x)) - settings.write_log("\t\t\t'x' dimension: {0}".format(len(self.grid.x)), level=3) - lon_dim = ('y', 'x', ) - else: - lat_dim = None - lon_dim = None - var_dim = None - - # Levels - if self.levels is not None: - netcdf.createDimension('lev', len(self.levels)) - settings.write_log("\t\t\t'lev' dimension: {0}".format(len(self.levels)), level=3) - - # Bounds - if self.grid.boundary_latitudes is not None: - # print boundary_latitudes.shape - # print len(boundary_latitudes[0, 0]) - netcdf.createDimension('nv', len(self.grid.boundary_latitudes[0, 0])) - settings.write_log("\t\t\t'nv' dimension: {0}".format(len(self.grid.boundary_latitudes[0, 0])), level=3) - # sys.exit() - - # Time - # netcdf.createDimension('time', None) - netcdf.createDimension('time', len(self.hours)) - settings.write_log("\t\t\t'time' dimension: {0}".format(len(self.hours)), level=3) - - # ===== Variables ===== - settings.write_log("\t\tCreating NetCDF variables.", level=2) - # Time - if self.date is None: - time = netcdf.createVariable('time', 'd', ('time',)) - time.units = "months since 2000-01-01 00:00:00" - time.standard_name = "time" - time.calendar = "gregorian" - time.long_name = "time" - time[:] = [0.] - else: - time = netcdf.createVariable('time', 'd', ('time',)) - time.units = str(Unit('hours').offset_by_time(encode_time(self.date.year, self.date.month, self.date.day, - self.date.hour, self.date.minute, self.date.second))) - time.standard_name = "time" - time.calendar = "gregorian" - time.long_name = "time" - if settings.rank == 0: - time[:] = self.hours - settings.write_log("\t\t\t'time' variable created with size: {0}".format(time[:].shape), level=3) - - # Latitude - lats = netcdf.createVariable('lat', 'f', lat_dim, zlib=self.compress) - lats.units = "degrees_north" - lats.axis = "Y" - lats.long_name = "latitude coordinate" - lats.standard_name = "latitude" - if settings.rank == 0: - lats[:] = self.grid.center_latitudes - settings.write_log("\t\t\t'lat' variable created with size: {0}".format(lats[:].shape), level=3) - - if self.grid.boundary_latitudes is not None: - lats.bounds = "lat_bnds" - lat_bnds = netcdf.createVariable('lat_bnds', 'f', lat_dim + ('nv',), zlib=self.compress) - # print lat_bnds[:].shape, boundary_latitudes.shape - if settings.rank == 0: - lat_bnds[:] = self.grid.boundary_latitudes - settings.write_log("\t\t\t'lat_bnds' variable created with size: {0}".format(lat_bnds[:].shape), level=3) - - # Longitude - lons = netcdf.createVariable('lon', 'f', lon_dim, zlib=self.compress) - lons.units = "degrees_east" - lons.axis = "X" - lons.long_name = "longitude coordinate" - lons.standard_name = "longitude" - if settings.rank == 0: - lons[:] = self.grid.center_longitudes - settings.write_log("\t\t\t'lon' variable created with size: {0}".format(lons[:].shape), level=3) - - if self.grid.boundary_longitudes is not None: - lons.bounds = "lon_bnds" - lon_bnds = netcdf.createVariable('lon_bnds', 'f', lon_dim + ('nv',), zlib=self.compress) - # print lon_bnds[:].shape, boundary_longitudes.shape - if settings.rank == 0: - lon_bnds[:] = self.grid.boundary_longitudes - settings.write_log("\t\t\t'lon_bnds' variable created with size: {0}".format(lon_bnds[:].shape), level=3) - - if Rotated: - # Rotated Latitude - rlat = netcdf.createVariable('rlat', 'f', ('rlat',), zlib=self.compress) - rlat.long_name = "latitude in rotated pole grid" - rlat.units = Unit("degrees").symbol - rlat.standard_name = "grid_latitude" - if settings.rank == 0: - rlat[:] = self.grid.rlat - settings.write_log("\t\t\t'rlat' variable created with size: {0}".format(rlat[:].shape), level=3) - - # Rotated Longitude - rlon = netcdf.createVariable('rlon', 'f', ('rlon',), zlib=self.compress) - rlon.long_name = "longitude in rotated pole grid" - rlon.units = Unit("degrees").symbol - rlon.standard_name = "grid_longitude" - if settings.rank == 0: - rlon[:] = self.grid.rlon - settings.write_log("\t\t\t'rlon' variable created with size: {0}".format(rlon[:].shape), level=3) - if LambertConformalConic: - x_var = netcdf.createVariable('x', 'd', ('x',), zlib=self.compress) - x_var.units = Unit("km").symbol - x_var.long_name = "x coordinate of projection" - x_var.standard_name = "projection_x_coordinate" - if settings.rank == 0: - x_var[:] = self.grid.x - settings.write_log("\t\t\t'x' variable created with size: {0}".format(x_var[:].shape), level=3) - - y_var = netcdf.createVariable('y', 'd', ('y',), zlib=self.compress) - y_var.units = Unit("km").symbol - y_var.long_name = "y coordinate of projection" - y_var.standard_name = "projection_y_coordinate" - if settings.rank == 0: - y_var[:] = self.grid.y - settings.write_log("\t\t\t'y' variable created with size: {0}".format(y_var[:].shape), level=3) - - cell_area_dim = var_dim - # Levels - if self.levels is not None: - var_dim = ('lev',) + var_dim - lev = netcdf.createVariable('lev', 'f', ('lev',), zlib=self.compress) - lev.units = Unit("m").symbol - lev.positive = 'up' - if settings.rank == 0: - lev[:] = self.levels - settings.write_log("\t\t\t'lev' variable created with size: {0}".format(lev[:].shape), level=3) - # print 'DATA LIIIIST {0}'.format(data_list) - # # All variables - if len(self.variables_attributes) is 0: - var = netcdf.createVariable('aux_var', 'f', ('time',) + var_dim, zlib=self.compress) - if settings.rank == 0: - var[:] = 0 - - index = 0 - for var_name, variable in self.variables_attributes.iteritems(): - index += 1 - - var = netcdf.createVariable(var_name, 'f', ('time',) + var_dim, zlib=self.compress) - - var.units = Unit(variable['units']).symbol - if 'long_name' in variable: - var.long_name = str(variable['long_name']) - if 'standard_name' in variable: - var.standard_name = str(variable['standard_name']) - if 'cell_method' in variable: - var.cell_method = str(variable['cell_method']) - var.coordinates = "lat lon" - if self.grid.cell_area is not None: - var.cell_measures = 'area: cell_area' - if RegularLatLon: - var.grid_mapping = 'crs' - elif Rotated: - var.grid_mapping = 'rotated_pole' - elif LambertConformalConic: - var.grid_mapping = 'Lambert_conformal' - settings.write_log("\t\t\t'{0}' variable created with size: {1}".format(var_name, var[:].shape) + - "\n\t\t\t\t'{0}' variable will be filled later.".format(var_name), level=3) - - settings.write_log("\t\tCreating NetCDF metadata.", level=2) - # Grid mapping - if RegularLatLon: - # CRS - mapping = netcdf.createVariable('crs', 'i') - mapping.grid_mapping_name = "latitude_longitude" - mapping.semi_major_axis = 6371000.0 - mapping.inverse_flattening = 0 - elif Rotated: - # Rotated pole - mapping = netcdf.createVariable('rotated_pole', 'c') - mapping.grid_mapping_name = 'rotated_latitude_longitude' - mapping.grid_north_pole_latitude = self.grid.new_pole_latitude_degrees - mapping.grid_north_pole_longitude = 90 - self.grid.new_pole_longitude_degrees - elif LambertConformalConic: - # CRS - mapping = netcdf.createVariable('Lambert_conformal', 'i') - mapping.grid_mapping_name = "lambert_conformal_conic" - mapping.standard_parallel = "{0}, {1}".format(self.grid.lat_1, self.grid.lat_2) - mapping.longitude_of_central_meridian = self.grid.lon_0 - mapping.latitude_of_projection_origin = self.grid.lat_0 - - # Cell area - if self.grid.cell_area is not None: - c_area = netcdf.createVariable('cell_area', 'f', cell_area_dim) - c_area.long_name = "area of the grid cell" - c_area.standard_name = "cell_area" - c_area.units = Unit("m2").symbol - # print c_area[:].shape, cell_area.shape - # c_area[grid.x_lower_bound:grid.x_upper_bound, grid.y_lower_bound:grid.y_upper_bound] = cell_area - - if self.global_attributes is not None: - netcdf.setncatts(self.global_attributes) - - netcdf.close() - - settings.write_time('WriterMonarch', 'create_parallel_netcdf', timeit.default_timer() - st_time, level=3) - return True - - def write_serial_netcdf(self, emission_list,): - """ - Write the netCDF4 file in serial mode. - - :param emission_list: Data to append. - :type emission_list: list - - :return: True at end. - :rtype: bool - """ - from cf_units import Unit, encode_time - - st_time = timeit.default_timer() - - mpi_numpy = False - mpi_vector = True - - # Gathering the index - if mpi_numpy or mpi_vector: - rank_position = np.array([self.grid.x_lower_bound, self.grid.x_upper_bound, self.grid.y_lower_bound, - self.grid.y_upper_bound], dtype='i') - full_position = None - if settings.rank == 0: - full_position = np.empty([settings.size, 4], dtype='i') - settings.comm.Gather(rank_position, full_position, root=0) - - if settings.rank == 0: - - regular_latlon = False - rotated = False - lcc = False - - if self.grid.grid_type == 'global': - regular_latlon = True - elif self.grid.grid_type == 'rotated': - rotated = True - elif self.grid.grid_type == 'lcc': - lcc = True - settings.write_log("\tCreating NetCDF file.", level=2) - netcdf = Dataset(self.path, mode='w', format="NETCDF4") - - # ===== Dimensions ===== - settings.write_log("\t\tCreating NetCDF dimensions.", level=2) - if regular_latlon: - var_dim = ('lat', 'lon',) - - # Latitude - if len(self.grid.center_latitudes.shape) == 1: - settings.write_log("\t\t\t'lat' dimension: {0}".format(self.grid.center_latitudes.shape[0]), - level=3) - netcdf.createDimension('lat', self.grid.center_latitudes.shape[0]) - lat_dim = ('lat',) - elif len(self.grid.center_latitudes.shape) == 2: - settings.write_log("\t\t\t'lat' dimension: {0}".format(self.grid.center_latitudes.shape[0]), - level=3) - netcdf.createDimension('lat', self.grid.center_latitudes.shape[0]) - lat_dim = ('lon', 'lat', ) - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError( - 'ERROR: Latitudes must be on a 1D or 2D array instead of {0} shape.'.format( - len(self.grid.center_latitudes.shape))) - sys.exit(1) - - # Longitude - if len(self.grid.center_longitudes.shape) == 1: - settings.write_log("\t\t\t'lon' dimension: {0}".format(self.grid.center_longitudes.shape[0]), - level=3) - netcdf.createDimension('lon', self.grid.center_longitudes.shape[0]) - lon_dim = ('lon',) - elif len(self.grid.center_longitudes.shape) == 2: - settings.write_log("\t\t\t'lon' dimension: {0}".format(self.grid.center_longitudes.shape[0]), - level=3) - netcdf.createDimension('lon', self.grid.center_longitudes.shape[1]) - lon_dim = ('lon', 'lat', ) - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError( - 'ERROR: Longitudes must be on a 1D or 2D array instead of {0} shape.'.format( - len(self.grid.center_longitudes.shape))) - sys.exit(1) - elif rotated: - var_dim = ('rlat', 'rlon',) - - # rotated Latitude - if self.grid.rlat is None: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError('ERROR: For rotated grids is needed the rotated latitudes.') - sys.exit(1) - settings.write_log("\t\t'rlat' dimension: {0}".format(len(self.grid.rlat)), level=2) - netcdf.createDimension('rlat', len(self.grid.rlat)) - lat_dim = ('rlat', 'rlon',) - - # rotated Longitude - if self.grid.rlon is None: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError('ERROR: For rotated grids is needed the rotated longitudes.') - sys.exit(1) - settings.write_log("\t\t\t'rlon' dimension: {0}".format(len(self.grid.rlon)), level=3) - netcdf.createDimension('rlon', len(self.grid.rlon)) - lon_dim = ('rlat', 'rlon',) - - elif lcc: - var_dim = ('y', 'x',) - settings.write_log("\t\t\t'y' dimension: {0}".format(len(self.grid.y)), level=3) - netcdf.createDimension('y', len(self.grid.y)) - lat_dim = ('y', 'x', ) - settings.write_log("\t\t\t'x' dimension: {0}".format(len(self.grid.x)), level=3) - netcdf.createDimension('x', len(self.grid.x)) - lon_dim = ('y', 'x', ) - else: - lat_dim = None - lon_dim = None - var_dim = None - - # Levels - if self.levels is not None: - settings.write_log("\t\t\t'lev' dimension: {0}".format(len(self.levels)), level=3) - netcdf.createDimension('lev', len(self.levels)) - - # Bounds - if self.grid.boundary_latitudes is not None: - settings.write_log("\t\t\t'nv' dimension: {0}".format(len(self.grid.boundary_latitudes[0, 0])), level=3) - netcdf.createDimension('nv', len(self.grid.boundary_latitudes[0, 0])) - - # Time - settings.write_log("\t\t\t'time' dimension: {0}".format(len(self.hours)), level=3) - netcdf.createDimension('time', len(self.hours)) - - # ===== Variables ===== - settings.write_log("\t\tCreating NetCDF variables.", level=2) - # Time - if self.date is None: - time = netcdf.createVariable('time', 'd', ('time',)) - time.units = "months since 2000-01-01 00:00:00" - time.standard_name = "time" - time.calendar = "gregorian" - time.long_name = "time" - time[:] = [0.] - else: - time = netcdf.createVariable('time', 'd', ('time',)) - time.units = str(Unit('hours').offset_by_time(encode_time( - self.date.year, self.date.month, self.date.day, self.date.hour, self.date.minute, - self.date.second))) - time.standard_name = "time" - time.calendar = "gregorian" - time.long_name = "time" - time[:] = self.hours - settings.write_log("\t\t\t'time' variable created with size: {0}".format(time[:].shape), level=3) - - # Latitude - lats = netcdf.createVariable('lat', 'f', lat_dim, zlib=self.compress) - lats.units = "degrees_north" - lats.axis = "Y" - lats.long_name = "latitude coordinate" - lats.standard_name = "latitude" - lats[:] = self.grid.center_latitudes - settings.write_log("\t\t\t'lat' variable created with size: {0}".format(lats[:].shape), level=3) - - if self.grid.boundary_latitudes is not None: - lats.bounds = "lat_bnds" - lat_bnds = netcdf.createVariable('lat_bnds', 'f', lat_dim + ('nv',), zlib=self.compress) - # print lat_bnds[:].shape, boundary_latitudes.shape - lat_bnds[:] = self.grid.boundary_latitudes - settings.write_log( - "\t\t\t'lat_bnds' variable created with size: {0}".format(lat_bnds[:].shape), level=3) - - # Longitude - lons = netcdf.createVariable('lon', 'f', lon_dim, zlib=self.compress) - lons.units = "degrees_east" - lons.axis = "X" - lons.long_name = "longitude coordinate" - lons.standard_name = "longitude" - lons[:] = self.grid.center_longitudes - settings.write_log("\t\t\t'lon' variable created with size: {0}".format(lons[:].shape), - level=3) - - if self.grid.boundary_longitudes is not None: - lons.bounds = "lon_bnds" - lon_bnds = netcdf.createVariable('lon_bnds', 'f', lon_dim + ('nv',), zlib=self.compress) - # print lon_bnds[:].shape, boundary_longitudes.shape - lon_bnds[:] = self.grid.boundary_longitudes - settings.write_log( - "\t\t\t'lon_bnds' variable created with size: {0}".format(lon_bnds[:].shape), level=3) - - if rotated: - # rotated Latitude - rlat = netcdf.createVariable('rlat', 'f', ('rlat',), zlib=self.compress) - rlat.long_name = "latitude in rotated pole grid" - rlat.units = Unit("degrees").symbol - rlat.standard_name = "grid_latitude" - rlat[:] = self.grid.rlat - settings.write_log("\t\t\t'rlat' variable created with size: {0}".format(rlat[:].shape), level=3) - - # rotated Longitude - rlon = netcdf.createVariable('rlon', 'f', ('rlon',), zlib=self.compress) - rlon.long_name = "longitude in rotated pole grid" - rlon.units = Unit("degrees").symbol - rlon.standard_name = "grid_longitude" - rlon[:] = self.grid.rlon - settings.write_log("\t\t\t'rlon' variable created with size: {0}".format(rlon[:].shape), level=3) - if lcc: - x_var = netcdf.createVariable('x', 'd', ('x',), zlib=self.compress) - x_var.units = Unit("km").symbol - x_var.long_name = "x coordinate of projection" - x_var.standard_name = "projection_x_coordinate" - x_var[:] = self.grid.x - settings.write_log("\t\t\t'x' variable created with size: {0}".format(x_var[:].shape), level=3) - - y_var = netcdf.createVariable('y', 'd', ('y',), zlib=self.compress) - y_var.units = Unit("km").symbol - y_var.long_name = "y coordinate of projection" - y_var.standard_name = "projection_y_coordinate" - y_var[:] = self.grid.y - settings.write_log("\t\t\t'y' variable created with size: {0}".format(y_var[:].shape), level=3) - - cell_area_dim = var_dim - # Levels - if self.levels is not None: - var_dim = ('lev',) + var_dim - lev = netcdf.createVariable('lev', 'f', ('lev',), zlib=self.compress) - lev.units = Unit("m").symbol - lev.positive = 'up' - lev[:] = self.levels - settings.write_log("\t\t\t'lev' variable created with size: {0}".format(lev[:].shape), level=3) - - if len(self.variables_attributes) is 0: - var = netcdf.createVariable('aux_var', 'f', ('time',) + var_dim, zlib=self.compress) - var[:] = 0 - - full_shape = None - index = 0 - for var_name in self.variables_attributes.iterkeys(): - if settings.size != 1: - settings.write_log("\t\t\tGathering {0} data.".format(var_name), level=3) - rank_data = self.calculate_data_by_var(var_name, emission_list, self.grid.shape) - if mpi_numpy or mpi_vector: - if rank_data is not None: - root_shape = settings.comm.bcast(rank_data.shape, root=0) - if full_shape is None: - full_shape = settings.comm.allgather(rank_data.shape) - # print 'Rank {0} full_shape: {1}\n'.format(settings.rank, full_shape) - - if mpi_numpy: - if settings.size != 1: - if settings.rank == 0: - recvbuf = np.empty((settings.size,) + rank_data.shape) - else: - recvbuf = None - if root_shape != rank_data.shape: - rank_data_aux = np.empty(root_shape) - rank_data_aux[:, :, :, :-1] = rank_data - rank_data = rank_data_aux - # print 'Rank {0} data.shape {1}'.format(settings.rank, rank_data.shape) - settings.comm.Gather(rank_data, recvbuf, root=0) - else: - recvbuf = rank_data - elif mpi_vector: - if rank_data is not None: - counts_i = self.tuple_to_index(full_shape) - rank_buff = [rank_data, counts_i[settings.rank]] - if settings.rank == 0: - displacements = self.calculate_displacements(counts_i) - recvdata = np.empty(sum(counts_i), dtype=settings.precision) - else: - displacements = None - recvdata = None - if settings.precision == np.float32: - recvbuf = [recvdata, counts_i, displacements, MPI.FLOAT] - elif settings.precision == np.float64: - recvbuf = [recvdata, counts_i, displacements, MPI.DOUBLE] - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError('ERROR: precision {0} unknown'.format(settings.precision)) - sys.exit(1) - - settings.comm.Gatherv(rank_buff, recvbuf, root=0) - - else: - if settings.size != 1: - data = settings.comm.gather(rank_data, root=0) - else: - data = rank_data - - if settings.rank == 0: - if not (mpi_numpy or mpi_vector): - if settings.size != 1: - try: - data = np.concatenate(data, axis=3) - except (UnboundLocalError, TypeError, IndexError): - data = 0 - index += 1 - var = netcdf.createVariable(var_name, 'f', ('time',) + var_dim, zlib=self.compress) - - var.units = Unit(self.variables_attributes[var_name]['units']).symbol - - if 'long_name' in self.variables_attributes[var_name]: - var.long_name = str(self.variables_attributes[var_name]['long_name']) - - if 'standard_name' in self.variables_attributes[var_name]: - var.standard_name = str(self.variables_attributes[var_name]['standard_name']) - - if 'cell_method' in self.variables_attributes[var_name]: - var.cell_method = str(self.variables_attributes[var_name]['cell_method']) - - var.coordinates = "lat lon" - - if self.grid.cell_area is not None: - var.cell_measures = 'area: cell_area' - if regular_latlon: - var.grid_mapping = 'crs' - elif rotated: - var.grid_mapping = 'rotated_pole' - elif lcc: - var.grid_mapping = 'Lambert_conformal' - - if mpi_numpy: - data = np.ones(var[:].shape, dtype=settings.precision) * 100 - for i in xrange(settings.size): - try: - if i == 0: - var[:, :, :, :full_position[i][3]] = recvbuf[i] - elif i == settings.size - 1: - var[:, :, :, full_position[i][2]:] = recvbuf[i, :, :, :, :-1] - else: - var[:, :, :, full_position[i][2]:full_position[i][3]] = \ - recvbuf[i, :, :, :, : full_shape[i][-1]] - except ValueError: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError("ERROR on i {0} ".format(i) + - "data shape: {0} ".format(data[:, :, :, full_position[i][2]:].shape) + - "recvbuf shape {0}".format(recvbuf[i].shape)) - sys.exit(1) - - elif mpi_vector: - if rank_data is not None: - data = np.empty(var[:].shape, dtype=settings.precision) - for i in xrange(settings.size): - if not i == settings.size - 1: - data[:, :, full_position[i][0]:full_position[i][1], - full_position[i][2]:full_position[i][3]] = \ - np.array(recvbuf[0][displacements[i]: displacements[i + 1]]).reshape(full_shape[i]) - else: - data[:, :, full_position[i][0]:full_position[i][1], - full_position[i][2]:full_position[i][3]] = \ - np.array(recvbuf[0][displacements[i]:]).reshape(full_shape[i]) - else: - data = 0 - var[:] = data - else: - var[:] = data - settings.write_log("\t\t\t'{0}' variable created with size: {1}".format(var_name, var[:].shape), - level=3) - settings.write_log("\t\tCreating NetCDF metadata.", level=2) - if settings.rank == 0: - # Grid mapping - if regular_latlon: - # CRS - mapping = netcdf.createVariable('crs', 'i') - mapping.grid_mapping_name = "latitude_longitude" - mapping.semi_major_axis = 6371000.0 - mapping.inverse_flattening = 0 - elif rotated: - # rotated pole - mapping = netcdf.createVariable('rotated_pole', 'c') - mapping.grid_mapping_name = 'rotated_latitude_longitude' - mapping.grid_north_pole_latitude = 90 - self.grid.new_pole_latitude_degrees - mapping.grid_north_pole_longitude = self.grid.new_pole_longitude_degrees - elif lcc: - # CRS - mapping = netcdf.createVariable('Lambert_conformal', 'i') - mapping.grid_mapping_name = "lambert_conformal_conic" - mapping.standard_parallel = "{0}, {1}".format(self.grid.lat_1, self.grid.lat_2) - mapping.longitude_of_central_meridian = self.grid.lon_0 - mapping.latitude_of_projection_origin = self.grid.lat_0 - - if self.grid.cell_area is not None: - cell_area = settings.comm.gather(self.grid.cell_area, root=0) - if settings.rank == 0: - # Cell area - if self.grid.cell_area is not None: - c_area = netcdf.createVariable('cell_area', 'f', cell_area_dim) - c_area.long_name = "area of the grid cell" - c_area.standard_name = "cell_area" - c_area.units = Unit("m2").symbol - - cell_area = np.concatenate(cell_area, axis=1) - - c_area[:] = cell_area - - if settings.rank == 0: - if self.global_attributes is not None: - netcdf.setncatts(self.global_attributes) - if settings.rank == 0: - netcdf.close() - settings.write_time('WriterMonarch', 'write_serial_netcdf', timeit.default_timer() - st_time, level=3) diff --git a/hermesv3_bu/modules/writing/writer_wrf_chem.py b/hermesv3_bu/modules/writing/writer_wrf_chem.py deleted file mode 100644 index 5cf01cf0dd1cebf6cd8743b1eb1aa4bbd0d7f47d..0000000000000000000000000000000000000000 --- a/hermesv3_bu/modules/writing/writer_wrf_chem.py +++ /dev/null @@ -1,485 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - -import sys -import timeit -import numpy as np -from netCDF4 import Dataset -from mpi4py import MPI -from hermesv3_gr.config import settings -from hermesv3_gr.modules.writing.writer import Writer - - -class WriterWrfChem(Writer): - """ - Class to Write the output file for the WRF-CHEM Chemical Transport Model. - - :param path: Path to the destination file. - :type path: str - - :param grid: Grid of the destination file. - :type grid: Grid - - :param levels: List with the levels of the grid. - :type levels: list - - :param date: Date of the output file - :type date: datetime.datetime - - :param hours: List with the timestamp hours. - :type hours: list. - - :param global_attributes_path: Path to the file that contains the static global attributes. - :type global_attributes_path: str - - :param compress: Indicates if you want to compress the netCDF variable data. - :type compress: bool - - :param parallel: Indicates if you want to write in parallel mode. - :type parallel. bool - """ - - def __init__(self, path, grid, levels, date, hours, global_attributes_path, compress=True, parallel=False): - super(WriterWrfChem, self).__init__(path, grid, levels, date, hours, global_attributes_path, compress, parallel) - - self.global_attributes_order = [ - 'TITLE', 'START_DATE', 'WEST-EAST_GRID_DIMENSION', 'SOUTH-NORTH_GRID_DIMENSION', - 'BOTTOM-TOP_GRID_DIMENSION', 'DX', 'DY', 'GRIDTYPE', 'DIFF_OPT', 'KM_OPT', 'DAMP_OPT', 'DAMPCOEF', 'KHDIF', - 'KVDIF', 'MP_PHYSICS', 'RA_LW_PHYSICS', 'RA_SW_PHYSICS', 'SF_SFCLAY_PHYSICS', 'SF_SURFACE_PHYSICS', - 'BL_PBL_PHYSICS', 'CU_PHYSICS', 'SF_LAKE_PHYSICS', 'SURFACE_INPUT_SOURCE', 'SST_UPDATE', 'GRID_FDDA', - 'GFDDA_INTERVAL_M', 'GFDDA_END_H', 'GRID_SFDDA', 'SGFDDA_INTERVAL_M', 'SGFDDA_END_H', - 'WEST-EAST_PATCH_START_UNSTAG', 'WEST-EAST_PATCH_END_UNSTAG', 'WEST-EAST_PATCH_START_STAG', - 'WEST-EAST_PATCH_END_STAG', 'SOUTH-NORTH_PATCH_START_UNSTAG', 'SOUTH-NORTH_PATCH_END_UNSTAG', - 'SOUTH-NORTH_PATCH_START_STAG', 'SOUTH-NORTH_PATCH_END_STAG', 'BOTTOM-TOP_PATCH_START_UNSTAG', - 'BOTTOM-TOP_PATCH_END_UNSTAG', 'BOTTOM-TOP_PATCH_START_STAG', 'BOTTOM-TOP_PATCH_END_STAG', 'GRID_ID', - 'PARENT_ID', 'I_PARENT_START', 'J_PARENT_START', 'PARENT_GRID_RATIO', 'DT', 'CEN_LAT', 'CEN_LON', - 'TRUELAT1', 'TRUELAT2', 'MOAD_CEN_LAT', 'STAND_LON', 'POLE_LAT', 'POLE_LON', 'GMT', 'JULYR', 'JULDAY', - 'MAP_PROJ', 'MMINLU', 'NUM_LAND_CAT', 'ISWATER', 'ISLAKE', 'ISICE', 'ISURBAN', 'ISOILWATER'] - - def unit_change(self, variable, data): - # TODO Documentation - """ - - :param variable: - :param data: - :return: - """ - from cf_units import Unit - - if data is not None: - units = None - for var_name in self.variables_attributes: - if var_name == variable: - units = self.variables_attributes[var_name]['units'] - break - - if Unit(units).symbol == Unit('mol.h-1.km-2').symbol: - # 10e6 -> from m2 to km2 - # 10e3 -> from kmol to mol - # 3600n -> from s to h - data = data * 10e6 * 10e3 * 3600 - elif Unit(units).symbol == Unit('ug.s-1.m-2').symbol: - # 10e9 -> from kg to ug - data = data * 10e9 - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError("The unit '{0}' of specie {1} is not defined correctly.".format(units, variable) + - " Should be 'mol.h-1.km-2' or 'ug.s-1.m-2'") - sys.exit(1) - return data - - def change_variable_attributes(self): - # TODO Documentation - """ - - :return: - """ - from cf_units import Unit - - new_variable_dict = {} - for variable in self.variables_attributes: - if Unit(variable['units']).symbol == Unit('mol.h-1.km-2').symbol: - new_variable_dict[variable['name']] = { - 'FieldType': np.int32(104), - 'MemoryOrder': "XYZ", - 'description': "EMISSIONS", - 'units': "mol km^-2 hr^-1", - 'stagger': "", - 'coordinates': "XLONG XLAT" - } - elif Unit(variable['units']).symbol == Unit('ug.s-1.m-2').symbol: - new_variable_dict[variable['name']] = { - 'FieldType': np.int32(104), - 'MemoryOrder': "XYZ", - 'description': "EMISSIONS", - 'units': "ug/m3 m/s", - 'stagger': "", - 'coordinates': "XLONG XLAT" - } - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError("The unit '{0}' of specie {1} is not ".format(variable['units'], variable['name']) + - "defined correctly. Should be 'mol.h-1.km-2' or 'ug.s-1.m-2'") - sys.exit(1) - - self.variables_attributes = new_variable_dict - - def read_global_attributes(self): - # TODO Documentation - """ - - :return: - """ - import pandas as pd - from warnings import warn as warning - - float_atts = ['DAMPCOEF', 'KHDIF', 'KVDIF', 'CEN_LAT', 'CEN_LON', 'DT'] - int_atts = ['BOTTOM-TOP_GRID_DIMENSION', 'DIFF_OPT', 'KM_OPT', 'DAMP_OPT', - 'MP_PHYSICS', 'RA_LW_PHYSICS', 'RA_SW_PHYSICS', 'SF_SFCLAY_PHYSICS', 'SF_SURFACE_PHYSICS', - 'BL_PBL_PHYSICS', 'CU_PHYSICS', 'SF_LAKE_PHYSICS', 'SURFACE_INPUT_SOURCE', 'SST_UPDATE', - 'GRID_FDDA', 'GFDDA_INTERVAL_M', 'GFDDA_END_H', 'GRID_SFDDA', 'SGFDDA_INTERVAL_M', 'SGFDDA_END_H', - 'BOTTOM-TOP_PATCH_START_UNSTAG', 'BOTTOM-TOP_PATCH_END_UNSTAG', 'BOTTOM-TOP_PATCH_START_STAG', - 'BOTTOM-TOP_PATCH_END_STAG', 'GRID_ID', 'PARENT_ID', 'I_PARENT_START', 'J_PARENT_START', - 'PARENT_GRID_RATIO', 'NUM_LAND_CAT', 'ISWATER', 'ISLAKE', 'ISICE', 'ISURBAN', 'ISOILWATER', - 'HISTORY'] - str_atts = ['GRIDTYPE', 'MMINLU'] - if self.grid.grid_type == 'lcc': - lat_ts = np.float32(self.grid.lat_0) - elif self.grid.grid_type == 'mercator': - lat_ts = np.float32(self.grid.lat_ts) - - atts_dict = { - 'BOTTOM-TOP_GRID_DIMENSION': np.int32(45), - 'GRIDTYPE': 'C', - 'DIFF_OPT': np.int32(1), - 'KM_OPT': np.int32(4), - 'DAMP_OPT': np.int32(3), - 'DAMPCOEF': np.float32(0.2), - 'KHDIF': np.float32(0.), - 'KVDIF': np.float32(0.), - 'MP_PHYSICS': np.int32(6), - 'RA_LW_PHYSICS': np.int32(4), - 'RA_SW_PHYSICS': np.int32(4), - 'SF_SFCLAY_PHYSICS': np.int32(2), - 'SF_SURFACE_PHYSICS': np.int32(2), - 'BL_PBL_PHYSICS': np.int32(8), - 'CU_PHYSICS': np.int32(0), - 'SF_LAKE_PHYSICS': np.int32(0), - 'SURFACE_INPUT_SOURCE': np.int32(1), - 'SST_UPDATE': np.int32(0), - 'GRID_FDDA': np.int32(0), - 'GFDDA_INTERVAL_M': np.int32(0), - 'GFDDA_END_H': np.int32(0), - 'GRID_SFDDA': np.int32(0), - 'SGFDDA_INTERVAL_M': np.int32(0), - 'SGFDDA_END_H': np.int32(0), - 'BOTTOM-TOP_PATCH_START_UNSTAG': np.int32(1), - 'BOTTOM-TOP_PATCH_END_UNSTAG': np.int32(44), - 'BOTTOM-TOP_PATCH_START_STAG': np.int32(1), - 'BOTTOM-TOP_PATCH_END_STAG': np.int32(45), - 'GRID_ID': np.int32(1), - 'PARENT_ID': np.int32(0), - 'I_PARENT_START': np.int32(1), - 'J_PARENT_START': np.int32(1), - 'PARENT_GRID_RATIO': np.int32(1), - 'DT': np.float32(18.), - 'MMINLU': 'MODIFIED_IGBP_MODIS_NOAH', - 'NUM_LAND_CAT': np.int32(41), - 'ISWATER': np.int32(17), - 'ISLAKE': np.int32(-1), - 'ISICE': np.int32(15), - 'ISURBAN': np.int32(13), - 'ISOILWATER': np.int32(14), - 'CEN_LAT': lat_ts, - 'CEN_LON': np.float32(self.grid.lon_0) - } - - if self.global_attributes_path is not None: - df = pd.read_csv(self.global_attributes_path) - - for att in atts_dict.iterkeys(): - try: - if att in int_atts: - atts_dict[att] = np.int32(df.loc[df['attribute'] == att, 'value'].item()) - elif att in float_atts: - atts_dict[att] = np.float32(df.loc[df['attribute'] == att, 'value'].item()) - elif att in str_atts: - atts_dict[att] = str(df.loc[df['attribute'] == att, 'value'].item()) - except ValueError: - print 'A warning has occurred. Check the .err file to get more information.' - if settings.rank == 0: - warning('The global attribute {0} is not defined; Using default value {1}'.format( - att, atts_dict[att])) - - else: - settings.write_log('WARNING: Check the .err file to get more information.') - message = 'WARNING: No output attributes defined, check the output_attributes' - message += ' parameter of the configuration file.\nUsing default values:' - for key, value in atts_dict.iteritems(): - message += '\n\t{0} = {1}'.format(key, value) - if settings.rank == 0: - warning(message) - - return atts_dict - - def create_global_attributes(self): - # TODO Documentation - """ - Create the global attributes that have to be filled. - """ - - global_attributes = self.read_global_attributes() - - global_attributes['TITLE'] = 'Emissions generated by HERMESv3_GR.' - global_attributes['START_DATE'] = self.date.strftime("%Y-%m-%d_%H:%M:%S") - global_attributes['JULYR'] = np.int32(self.date.year) - global_attributes['JULDAY'] = np.int32(self.date.strftime("%j")) - global_attributes['GMT'] = np.float32(self.date.hour) - global_attributes['HISTORY'] = \ - 'Code developed by Barcelona Supercomputing Center (BSC, https://www.bsc.es/). ' + \ - 'Developer: Carles Tena Medina (carles.tena@bsc.es). ' + \ - 'Reference: Guevara et al., 2018, GMD., in preparation.' - - if self.grid.grid_type == 'lcc' or self.grid.grid_type == 'mercator': - global_attributes['WEST-EAST_GRID_DIMENSION'] = np.int32(self.grid.nx + 1) - global_attributes['SOUTH-NORTH_GRID_DIMENSION'] = np.int32(self.grid.ny + 1) - global_attributes['DX'] = np.float32(self.grid.inc_x) - global_attributes['DY'] = np.float32(self.grid.inc_y) - global_attributes['SURFACE_INPUT_SOURCE'] = np.int32(1) - global_attributes['WEST-EAST_PATCH_START_UNSTAG'] = np.int32(1) - global_attributes['WEST-EAST_PATCH_END_UNSTAG'] = np.int32(self.grid.nx) - global_attributes['WEST-EAST_PATCH_START_STAG'] = np.int32(1) - global_attributes['WEST-EAST_PATCH_END_STAG'] = np.int32(self.grid.nx + 1) - global_attributes['SOUTH-NORTH_PATCH_START_UNSTAG'] = np.int32(1) - global_attributes['SOUTH-NORTH_PATCH_END_UNSTAG'] = np.int32(self.grid.ny) - global_attributes['SOUTH-NORTH_PATCH_START_STAG'] = np.int32(1) - global_attributes['SOUTH-NORTH_PATCH_END_STAG'] = np.int32(self.grid.ny + 1) - - global_attributes['POLE_LAT'] = np.float32(90) - global_attributes['POLE_LON'] = np.float32(0) - - if self.grid.grid_type == 'lcc': - global_attributes['MAP_PROJ'] = np.int32(1) - global_attributes['TRUELAT1'] = np.float32(self.grid.lat_1) - global_attributes['TRUELAT2'] = np.float32(self.grid.lat_2) - global_attributes['MOAD_CEN_LAT'] = np.float32(self.grid.lat_0) - global_attributes['STAND_LON'] = np.float32(self.grid.lon_0) - elif self.grid.grid_type == 'mercator': - global_attributes['MAP_PROJ'] = np.int32(3) - global_attributes['TRUELAT1'] = np.float32(self.grid.lat_ts) - global_attributes['TRUELAT2'] = np.float32(0) - global_attributes['MOAD_CEN_LAT'] = np.float32(self.grid.lat_ts) - global_attributes['STAND_LON'] = np.float32(self.grid.lon_0) - - return global_attributes - - def create_times_var(self): - # TODO Documentation - """ - - :return: - """ - from datetime import timedelta - import netCDF4 - - aux_times_list = [] - - for hour in self.hours: - aux_date = self.date + timedelta(hours=hour) - aux_times_list.append(aux_date.strftime("%Y-%m-%d_%H:%M:%S")) - - str_out = netCDF4.stringtochar(np.array(aux_times_list)) - return str_out - - def create_parallel_netcdf(self): - # TODO Documentation - """ - - :return: - """ - st_time = timeit.default_timer() - settings.write_log("\tCreating parallel NetCDF file.", level=2) - netcdf = Dataset(self.path, mode='w', format="NETCDF4") - - # ===== Dimensions ===== - settings.write_log("\t\tCreating NetCDF dimensions.", level=2) - netcdf.createDimension('Time', len(self.hours)) - # netcdf.createDimension('Time', None) - settings.write_log("\t\t\t'Time' dimension: {0}".format('UNLIMITED ({0})'.format(len(self.hours))), - level=3) - netcdf.createDimension('DateStrLen', 19) - settings.write_log("\t\t\t'DateStrLen' dimension: 19", level=3) - netcdf.createDimension('west_east', self.grid.center_longitudes.shape[1]) - settings.write_log("\t\t\t'west_east' dimension: {0}".format(len(self.hours)), level=3) - netcdf.createDimension('south_north', self.grid.center_latitudes.shape[0]) - settings.write_log("\t\t\t'south_north' dimension: {0}".format(self.grid.center_latitudes.shape[0]), - level=3) - netcdf.createDimension('emissions_zdim', len(self.levels)) - settings.write_log("\t\t\t'emissions_zdim' dimension: {0}".format(len(self.levels)), level=3) - - # ===== Variables ===== - settings.write_log("\t\tCreating NetCDF variables.", level=2) - times = netcdf.createVariable('Times', 'S1', ('Time', 'DateStrLen', )) - times[:] = self.create_times_var() - settings.write_log("\t\t\t'Times' variable created with size: {0}".format(times[:].shape), level=3) - - index = 0 - for var_name in self.variables_attributes.iterkeys(): - index += 1 - var = netcdf.createVariable(var_name, 'f', ('Time', 'emissions_zdim', 'south_north', 'west_east',), - zlib=self.compress) - var.setncatts(self.variables_attributes[var_name]) - settings.write_log("\t\t\t'{0}' variable created with size: {1}".format(var_name, var[:].shape) + - "\n\t\t\t\t'{0}' variable will be filled later.".format(var_name), level=3) - - # ===== Global attributes ===== - settings.write_log("\t\tCreating NetCDF metadata.", level=2) - global_attributes = self.create_global_attributes() - for attribute in self.global_attributes_order: - netcdf.setncattr(attribute, global_attributes[attribute]) - - netcdf.close() - - settings.write_time('WriterCmaq', 'create_parallel_netcdf', timeit.default_timer() - st_time, level=3) - - return True - - def write_serial_netcdf(self, emission_list): - # TODO Documentation - """ - - :param emission_list: - :return: - """ - st_time = timeit.default_timer() - - # Gathering the index - rank_position = np.array( - [self.grid.x_lower_bound, self.grid.x_upper_bound, self.grid.y_lower_bound, self.grid.y_upper_bound], - dtype='i') - full_position = None - if settings.rank == 0: - full_position = np.empty([settings.size, 4], dtype='i') - settings.comm.Gather(rank_position, full_position, root=0) - - if settings.rank == 0: - settings.write_log("\tCreating NetCDF file.", level=2) - netcdf = Dataset(self.path, mode='w', format="NETCDF4") - - # ===== Dimensions ===== - settings.write_log("\t\tCreating NetCDF dimensions.", level=2) - netcdf.createDimension('Time', None) - settings.write_log("\t\t\t'Time' dimension: UNLIMITED", level=3) - netcdf.createDimension('DateStrLen', 19) - settings.write_log("\t\t\t'DateStrLen' dimension: 19", level=3) - netcdf.createDimension('west_east', self.grid.center_longitudes.shape[1]) - settings.write_log("\t\t\t'west_east' dimension: {0}".format(len(self.hours)), level=3) - netcdf.createDimension('south_north', self.grid.center_latitudes.shape[0]) - settings.write_log("\t\t\t'south_north' dimension: {0}".format(self.grid.center_latitudes.shape[0]), - level=3) - netcdf.createDimension('emissions_zdim', len(self.levels)) - settings.write_log("\t\t\t'emissions_zdim' dimension: {0}".format(len(self.levels)), level=3) - - # ===== Variables ===== - settings.write_log("\t\tCreating NetCDF variables.", level=2) - times = netcdf.createVariable('Times', 'S1', ('Time', 'DateStrLen', )) - times[:] = self.create_times_var() - settings.write_log("\t\t\t'Times' variable created with size: {0}".format(times[:].shape), level=3) - - full_shape = None - index = 0 - - # self.change_variable_attributes() - - for var_name in self.variables_attributes.iterkeys(): - if settings.size != 1: - settings.write_log("\t\t\tGathering {0} data.".format(var_name), level=3) - rank_data = self.calculate_data_by_var(var_name, emission_list, self.grid.shape) - if rank_data is not None: - # root_shape = settings.comm.bcast(rank_data.shape, root=0) - if full_shape is None: - full_shape = settings.comm.allgather(rank_data.shape) - - counts_i = self.tuple_to_index(full_shape) - rank_buff = [rank_data, counts_i[settings.rank]] - if settings.rank == 0: - displacements = self.calculate_displacements(counts_i) - recvdata = np.empty(sum(counts_i), dtype=settings.precision) - else: - displacements = None - recvdata = None - if settings.precision == np.float32: - recvbuf = [recvdata, counts_i, displacements, MPI.FLOAT] - elif settings.precision == np.float64: - recvbuf = [recvdata, counts_i, displacements, MPI.DOUBLE] - else: - settings.write_log('ERROR: Check the .err file to get more info.') - if settings.rank == 0: - raise TypeError('ERROR: precision {0} unknown'.format(settings.precision)) - sys.exit(1) - - settings.comm.Gatherv(rank_buff, recvbuf, root=0) - - if settings.rank == 0: - if settings.size != 1: - try: - data = np.concatenate(data, axis=3) - except (UnboundLocalError, TypeError, IndexError): - data = 0 - st_time = timeit.default_timer() - index += 1 - - var = netcdf.createVariable(var_name, 'f', ('Time', 'emissions_zdim', 'south_north', 'west_east',), - zlib=self.compress) - var.setncatts(self.variables_attributes[var_name]) - - var_time = timeit.default_timer() - - # data_list = []#np.empty(shape, dtype=np.float64) - - if rank_data is not None: - data = np.empty(var[:].shape, dtype=settings.precision) - for i in xrange(settings.size): - # print 'Resizeing {0}'.format(i) - if not i == settings.size - 1: - data[:, :, full_position[i][0]:full_position[i][1], - full_position[i][2]:full_position[i][3]] = \ - np.array(recvbuf[0][displacements[i]: displacements[i + 1]]).reshape(full_shape[i]) - else: - data[:, :, full_position[i][0]:full_position[i][1], - full_position[i][2]:full_position[i][3]] = \ - np.array(recvbuf[0][displacements[i]:]).reshape(full_shape[i]) - else: - data = 0 - var[:] = data - settings.write_log("\t\t\t'{0}' variable created with size: {1}".format(var_name, var[:].shape), - level=3) - settings.write_log("\t\tCreating NetCDF metadata.", level=2) - if settings.rank == 0: - # ===== Global attributes ===== - global_attributes = self.create_global_attributes() - for attribute in self.global_attributes_order: - netcdf.setncattr(attribute, global_attributes[attribute]) - - netcdf.close() - settings.write_time('WriterWrfChem', 'write_serial_netcdf', timeit.default_timer() - st_time, level=3) - return True diff --git a/hermesv3_bu/modules/traffic/__init__.py b/hermesv3_bu/sectors/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from hermesv3_bu/modules/traffic/__init__.py rename to hermesv3_bu/sectors/__init__.py diff --git a/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py b/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..dcf79751df6bdd887727377c376a510e68d2e0c0 --- /dev/null +++ b/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python + +import os +import timeit +import pandas as pd +import numpy as np + +from hermesv3_bu.sectors.agricultural_sector import AgriculturalSector +from hermesv3_bu.io_server.io_raster import IoRaster +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.io_server.io_netcdf import IoNetcdf +from hermesv3_bu.logger.log import Log + +formula = True + + +class AgriculturalCropFertilizersSector(AgriculturalSector): + def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, + vertical_levels, crop_list, nut_shapefile, land_uses_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, + cultivated_ratio, fertilizer_rate, crop_f_parameter, crop_f_fertilizers, gridded_ph, gridded_cec, + fertilizer_denominator_yearly_factor_path, crop_calendar, temperature_path, wind_speed_path, + crop_growing_degree_day_path): + spent_time = timeit.default_timer() + logger.write_log('===== AGRICULTURAL CROP FERTILIZERS SECTOR =====') + super(AgriculturalCropFertilizersSector, self).__init__( + comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, nut_shapefile, source_pollutants, + vertical_levels, crop_list, land_uses_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, None, None, + None, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) + + self.day_dict = self.calculate_num_days() + + self.cultivated_ratio = self.read_profiles(cultivated_ratio) + self.fertilizer_rate = self.read_profiles(fertilizer_rate) + self.crop_f_parameter = self.read_profiles(crop_f_parameter) + self.crop_f_fertilizers = self.read_profiles(crop_f_fertilizers) + + if self.comm.Get_rank() == 0: + self.logger.write_log('Getting gridded constants', message_level=2) + self.gridded_constants = self.get_gridded_constants( + os.path.join(auxiliary_dir, 'fertilizers', 'gridded_constants.shp'), + gridded_ph, + os.path.join(auxiliary_dir, 'fertilizers', 'gridded_ph.tiff'), + gridded_cec, + os.path.join(auxiliary_dir, 'fertilizers', 'gridded_cec.tiff')) + self.ef_by_crop = self.get_ef_by_crop() + else: + self.logger.write_log('Waiting for master to get the gridded constants', message_level=2) + self.gridded_constants = None + self.ef_by_crop = None + + self.gridded_constants = self.comm.bcast(self.gridded_constants, root=0) + # self.gridded_constants = IoShapefile(self.comm).split_shapefile(self.gridded_constants) + self.gridded_constants = self.gridded_constants.loc[self.crop_distribution.index, :] + self.ef_by_crop = self.comm.bcast(self.ef_by_crop, root=0) + # self.ef_by_crop = IoShapefile(self.comm).split_shapefile(self.ef_by_crop) + self.ef_by_crop = self.ef_by_crop.loc[self.crop_distribution.index, :] + + self.fertilizer_denominator_yearly_factor_path = fertilizer_denominator_yearly_factor_path + self.crop_calendar = self.read_profiles(crop_calendar) + + self.temperature_path = temperature_path + self.wind_speed_path = wind_speed_path + self.crop_growing_degree_day_path = crop_growing_degree_day_path + self.logger.write_time_log('AgriculturalCropFertilizersSector', '__init__', timeit.default_timer() - spent_time) + + def get_ftype_fcrop_fmode_by_nut(self, crop, nut_list): + spent_time = timeit.default_timer() + filtered_crop_f_parameter = self.crop_f_parameter.loc[(self.crop_f_parameter['code'].isin(nut_list)) & + (self.crop_f_parameter['crop'] == crop), :].copy() + filtered_crop_f_parameter.rename(columns={'code': 'nut_code'}, inplace=True) + filtered_crop_f_parameter.set_index('nut_code', inplace=True) + + element_list = [] + for i, element in self.crop_f_fertilizers.iterrows(): + element_list.append(element['fertilizer_type']) + filtered_crop_f_parameter[element['fertilizer_type']] *= element['values'] + + f_by_nut = pd.concat([filtered_crop_f_parameter.loc[:, element_list].sum(axis=1), + filtered_crop_f_parameter['f_crop'], + filtered_crop_f_parameter['f_mode']], axis=1).reset_index() + f_by_nut.rename(columns={0: 'f_type'}, inplace=True) + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'get_ftype_fcrop_fmode_by_nut', + timeit.default_timer() - spent_time) + + return f_by_nut + + def get_ef_by_crop(self): + spent_time = timeit.default_timer() + total_crop_df = self.gridded_constants.loc[:, ['geometry', 'nut_code']] + for crop in self.crop_list: + crop_ef = self.gridded_constants.loc[:, ['geometry', 'nut_code']].copy() + # f_ph + if formula: + # After Zhang et al. (2018) + crop_ef['f_ph'] = (0.067 * self.gridded_constants['ph'] ** 2) - \ + (0.69 * self.gridded_constants['ph']) + 0.68 + else: + crop_ef['f_ph'] = 0 + crop_ef.loc[self.gridded_constants['ph'] <= 5.5, 'f_ph'] = -1.072 + crop_ef.loc[(self.gridded_constants['ph'] > 5.5) & (self.gridded_constants['ph'] <= 7.3), 'f_ph'] = \ + -0.933 + crop_ef.loc[(self.gridded_constants['ph'] > 7.3) & (self.gridded_constants['ph'] <= 8.5), 'f_ph'] = \ + -0.608 + crop_ef.loc[self.gridded_constants['ph'] > 8.5, 'f_ph'] = 0 + # f_cec + crop_ef['f_cec'] = 0 + crop_ef.loc[self.gridded_constants['cec'] <= 16, 'f_cec'] = 0.088 + crop_ef.loc[(self.gridded_constants['cec'] > 16) & (self.gridded_constants['cec'] <= 24), 'f_cec'] = 0.012 + crop_ef.loc[(self.gridded_constants['cec'] > 24) & (self.gridded_constants['cec'] <= 32), 'f_cec'] = 0.163 + crop_ef.loc[self.gridded_constants['cec'] > 32, 'f_cec'] = 0 + # f_type + # f_crop + # f_mode + + f_by_nut = self.get_ftype_fcrop_fmode_by_nut(crop, np.unique(crop_ef['nut_code'].values)) + + crop_ef = pd.merge(crop_ef.reset_index(), f_by_nut, how='left', on='nut_code') + crop_ef.set_index('FID', inplace=True) + + crop_ef['f_sum'] = np.exp(crop_ef['f_ph'] + crop_ef['f_cec'] + crop_ef['f_type'] + crop_ef['f_crop'] + + crop_ef['f_mode']) + + total_crop_df['EF_{0}'.format(crop)] = crop_ef['f_sum'] + + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'get_ef_by_crop', + timeit.default_timer() - spent_time) + return total_crop_df + + def to_dst_resolution(self, src_shapefile, value): + spent_time = timeit.default_timer() + + intersection = self.spatial_overlays(src_shapefile.to_crs(self.grid_shp.crs), self.grid_shp.reset_index()) + intersection['area'] = intersection.geometry.area + dst_shapefile = self.grid_shp.reset_index().copy() + dst_shapefile['involved_area'] = intersection.groupby('FID')['area'].sum() + intersection_with_dst_areas = pd.merge(intersection, dst_shapefile.loc[:, ['FID', 'involved_area']], + how='left', on='FID') + intersection_with_dst_areas['involved_area'] = \ + intersection_with_dst_areas['area'] / intersection_with_dst_areas['involved_area'] + + intersection_with_dst_areas[value] = \ + intersection_with_dst_areas[value] * intersection_with_dst_areas['involved_area'] + dst_shapefile[value] = intersection_with_dst_areas.groupby('FID')[value].sum() + dst_shapefile.drop('involved_area', axis=1, inplace=True) + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'to_dst_resolution', + timeit.default_timer() - spent_time) + dst_shapefile.set_index('FID', inplace=True) + + return dst_shapefile + + def to_dst_resolution_parallel(self, src_shapefile, index, value): + spent_time = timeit.default_timer() + + grid_shp = self.grid_shp.loc[index, :].copy() + src_shapefile = self.comm.bcast(src_shapefile, root=0) + src_shapefile = src_shapefile.to_crs(grid_shp.crs) + src_shapefile = src_shapefile[src_shapefile.within(grid_shp.unary_union)] + + intersection = self.spatial_overlays(src_shapefile, grid_shp) + intersection['area'] = intersection.geometry.area + dst_shapefile = grid_shp.copy() + dst_shapefile['involved_area'] = intersection.groupby('FID')['area'].sum() + intersection_with_dst_areas = pd.merge(intersection, dst_shapefile.loc[:, ['FID', 'involved_area']], + how='left', on='FID') + intersection_with_dst_areas['involved_area'] = \ + intersection_with_dst_areas['area'] / intersection_with_dst_areas['involved_area'] + + intersection_with_dst_areas[value] = \ + intersection_with_dst_areas[value] * intersection_with_dst_areas['involved_area'] + dst_shapefile[value] = intersection_with_dst_areas.groupby('FID')[value].sum() + dst_shapefile.drop('involved_area', axis=1, inplace=True) + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'to_dst_resolution_parallel', + timeit.default_timer() - spent_time) + dst_shapefile.set_index('FID', inplace=True) + + return dst_shapefile + + def get_gridded_constants(self, gridded_ph_cec_path, ph_path, clipped_ph_path, cec_path, clipped_cec_path): + spent_time = timeit.default_timer() + if not os.path.exists(gridded_ph_cec_path): + self.logger.write_log('Getting PH from {0}'.format(ph_path), message_level=2) + IoRaster(self.comm).clip_raster_with_shapefile_poly(ph_path, self.clip.shapefile, clipped_ph_path, + nodata=255) + self.logger.write_log('PH clipped done!', message_level=3) + ph_gridded = IoRaster(self.comm).to_shapefile_serie(clipped_ph_path, nodata=255) + self.logger.write_log('PH to shapefile done!', message_level=3) + ph_gridded.rename(columns={'data': 'ph'}, inplace=True) + # To correct input data + ph_gridded['ph'] = ph_gridded['ph'] / 10 + self.logger.write_log('PH to destiny resolution ...', message_level=3) + ph_gridded = self.to_dst_resolution(ph_gridded, value='ph') + self.logger.write_log('PH to destiny resolution done!', message_level=3) + + self.logger.write_log('Getting CEC from {0}'.format(cec_path), message_level=2) + IoRaster(self.comm).clip_raster_with_shapefile_poly(cec_path, self.clip.shapefile, clipped_cec_path, + nodata=-32768) + self.logger.write_log('CEC clipped done!', message_level=3) + cec_gridded = IoRaster(self.comm).to_shapefile_serie(clipped_cec_path, nodata=-32768) + self.logger.write_log('CEC to shapefile done!', message_level=3) + cec_gridded.rename(columns={'data': 'cec'}, inplace=True) + self.logger.write_log('CEC to destiny resolution ...', message_level=3) + cec_gridded = self.to_dst_resolution(cec_gridded, value='cec') + self.logger.write_log('CEC to destiny resolution done!', message_level=3) + gridded_ph_cec = ph_gridded + gridded_ph_cec['cec'] = cec_gridded['cec'] + + gridded_ph_cec.dropna(inplace=True) + + gridded_ph_cec = self.add_nut_code(gridded_ph_cec, self.nut_shapefile) + gridded_ph_cec.index.name = 'FID' + # gridded_ph_cec.set_index('FID', inplace=True) + + # # Selecting only PH and CEC cells that have also some crop. + # gridded_ph_cec = gridded_ph_cec.loc[self.crop_distribution.index, :] + IoShapefile(self.comm).write_shapefile_serial(gridded_ph_cec.reset_index(), gridded_ph_cec_path) + else: + gridded_ph_cec = IoShapefile(self.comm).read_shapefile_serial(gridded_ph_cec_path) + gridded_ph_cec.set_index('FID', inplace=True) + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'get_gridded_constants', + timeit.default_timer() - spent_time) + return gridded_ph_cec + + def get_gridded_constants_parallel(self, gridded_ph_cec_path, ph_path, clipped_ph_path, cec_path, clipped_cec_path, + index): + spent_time = timeit.default_timer() + if not os.path.exists(gridded_ph_cec_path): + if self.comm.Get_rank() == 0: + self.logger.write_log('Getting PH from {0}'.format(ph_path), message_level=2) + IoRaster(self.comm).clip_raster_with_shapefile_poly(ph_path, self.clip.shapefile, clipped_ph_path, + nodata=255) + self.logger.write_log('PH clipped done!', message_level=3) + ph_gridded = IoRaster(self.comm).to_shapefile_serie(clipped_ph_path, nodata=255) + self.logger.write_log('PH to shapefile done!', message_level=3) + ph_gridded.rename(columns={'data': 'ph'}, inplace=True) + # To correct input data + ph_gridded['ph'] = ph_gridded['ph'] / 10 + else: + ph_gridded = None + + self.logger.write_log('PH to destiny resolution ...', message_level=3) + ph_gridded = self.to_dst_resolution_parallel(ph_gridded, index, value='ph') + self.logger.write_log('PH to destiny resolution done!', message_level=3) + if self.comm.Get_rank() == 0: + self.logger.write_log('Getting CEC from {0}'.format(cec_path), message_level=2) + IoRaster(self.comm).clip_raster_with_shapefile_poly(cec_path, self.clip.shapefile, clipped_cec_path, + nodata=-32768) + self.logger.write_log('CEC clipped done!', message_level=3) + cec_gridded = IoRaster(self.comm).to_shapefile_serie(clipped_cec_path, nodata=-32768) + self.logger.write_log('CEC to shapefile done!', message_level=3) + cec_gridded.rename(columns={'data': 'cec'}, inplace=True) + else: + cec_gridded = None + + self.logger.write_log('CEC to destiny resolution ...', message_level=3) + cec_gridded = self.to_dst_resolution_parallel(cec_gridded, index, value='cec') + self.logger.write_log('CEC to destiny resolution done!', message_level=3) + + gridded_ph_cec = ph_gridded + gridded_ph_cec['cec'] = cec_gridded['cec'] + + gridded_ph_cec.dropna(inplace=True) + + gridded_ph_cec = self.add_nut_code(gridded_ph_cec, self.nut_shapefile) + gridded_ph_cec.index.name = 'FID' + # gridded_ph_cec.set_index('FID', inplace=True) + + # # Selecting only PH and CEC cells that have also some crop. + # gridded_ph_cec = gridded_ph_cec.loc[self.crop_distribution.index, :] + IoShapefile(self.comm).write_shapefile_parallel(gridded_ph_cec.reset_index(), gridded_ph_cec_path) + else: + gridded_ph_cec = IoShapefile(self.comm).read_shapefile_parallel(gridded_ph_cec_path) + gridded_ph_cec.set_index('FID', inplace=True) + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'get_gridded_constants_parallel', + timeit.default_timer() - spent_time) + return gridded_ph_cec + + def get_daily_inputs(self, yearly_emissions): + spent_time = timeit.default_timer() + daily_inputs = {} + geometry_shp = yearly_emissions.loc[:, ['geometry']].reset_index().to_crs({'init': 'epsg:4326'}) + + geometry_shp['c_lat'] = geometry_shp.centroid.y + geometry_shp['c_lon'] = geometry_shp.centroid.x + geometry_shp['centroid'] = geometry_shp.centroid + geometry_shp.drop(columns='geometry', inplace=True) + + for day in self.day_dict.keys(): + aux_df = yearly_emissions.copy().reset_index() + + self.logger.write_log('Getting temperature from {0}'.format( + os.path.join(self.temperature_path, 'tas_{0}{1}.nc'.format(day.year, str(day.month).zfill(2))))) + meteo_df = IoNetcdf(self.comm).get_data_from_netcdf( + os.path.join(self.temperature_path, 'tas_{0}{1}.nc'.format(day.year, str(day.month).zfill(2))), + 'tas', 'daily', day, geometry_shp) + meteo_df['tas'] = meteo_df['tas'] - 273.15 + + self.logger.write_log('Getting surface wind speed from {0}'.format( + os.path.join(self.wind_speed_path, 'sfcWind_{0}{1}.nc'.format(day.year, str(day.month).zfill(2))))) + meteo_df['sfcWind'] = IoNetcdf(self.comm).get_data_from_netcdf( + os.path.join(self.wind_speed_path, 'sfcWind_{0}{1}.nc'.format(day.year, str(day.month).zfill(2))), + 'sfcWind', 'daily', day, geometry_shp).loc[:, 'sfcWind'] + + for crop in self.crop_list: + self.logger.write_log('Getting fertilizer denominator yearly factor from {0}'.format( + self.fertilizer_denominator_yearly_factor_path.replace('', crop).replace( + '', str(day.year)))) + meteo_df['d_{0}'.format(crop)] = IoNetcdf(self.comm).get_data_from_netcdf( + self.fertilizer_denominator_yearly_factor_path.replace('', crop).replace( + '', str(day.year)), 'FD', 'yearly', day, geometry_shp).loc[:, 'FD'] + self.logger.write_log('Getting growing degree day from {0}'.format( + self.crop_growing_degree_day_path.replace('', 'winter').replace('', str(day.year)))) + meteo_df['winter'] = IoNetcdf(self.comm).get_data_from_netcdf( + self.crop_growing_degree_day_path.replace('', 'winter').replace('', str(day.year)), + 'Tsum', 'yearly', day, geometry_shp).loc[:, 'Tsum'].astype(np.int16) + self.logger.write_log('Getting growing degree day from {0}'.format( + self.crop_growing_degree_day_path.replace('', 'spring').replace('', str(day.year)))) + meteo_df['spring'] = IoNetcdf(self.comm).get_data_from_netcdf( + self.crop_growing_degree_day_path.replace('', 'spring').replace('', str(day.year)), + 'Tsum', 'yearly', day, geometry_shp).loc[:, 'Tsum'].astype(np.int16) + + aux_df = aux_df.to_crs({'init': 'epsg:4326'}) + aux_df['centroid'] = aux_df.centroid + + aux_df['REC'] = aux_df.apply(self.nearest, geom_union=meteo_df.unary_union, df1=aux_df, + df2=meteo_df, geom1_col='centroid', src_column='REC', axis=1) + aux_df = pd.merge(aux_df, meteo_df, how='left', on='REC') + + aux_df.drop(columns=['centroid', 'REC', 'geometry_y'], axis=1, inplace=True) + aux_df.rename(columns={'geometry_x': 'geometry'}, inplace=True) + aux_df.set_index('FID', inplace=True) + daily_inputs[day] = aux_df + + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'get_daily_inputs', + timeit.default_timer() - spent_time) + return daily_inputs + + def calculate_yearly_emissions(self): + spent_time = timeit.default_timer() + + self.logger.write_log('Calculating yearly emissions') + self.crop_distribution = pd.merge(self.crop_distribution.reset_index(), + self.ef_by_crop.loc[:, ['nut_code']].reset_index(), how='left', on='FID') + + self.crop_distribution.set_index('FID', inplace=True) + # self.ef_by_crop = self.ef_by_crop.loc[self.crop_distribution.index, :] + + for crop in self.crop_list: + self.crop_distribution[crop] = self.crop_distribution.groupby('nut_code')[crop].apply( + lambda x: x.multiply(np.float64(self.cultivated_ratio.loc[0, crop]) * + self.fertilizer_rate.loc[self.fertilizer_rate['code'] == x.name, crop].values[0])) + self.crop_distribution[crop] = self.crop_distribution[crop] * self.ef_by_crop['EF_{0}'.format(crop)] + + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'calculate_yearly_emissions', + timeit.default_timer() - spent_time) + return self.crop_distribution + + def calculate_nh3_emissions(self, day, daily_inputs): + import math + spent_time = timeit.default_timer() + daily_inputs['exp'] = np.exp(daily_inputs['tas'].multiply(0.0223) + daily_inputs['sfcWind'].multiply(0.0419)) + daily_inputs.drop(['tas', 'sfcWind'], axis=1, inplace=True) + + for crop in self.crop_list: + beta_1 = self.crop_calendar.loc[self.crop_calendar['crop'] == crop, 'beta_1'].values[0] + beta_2 = self.crop_calendar.loc[self.crop_calendar['crop'] == crop, 'beta_2'].values[0] + beta_3 = self.crop_calendar.loc[self.crop_calendar['crop'] == crop, 'beta_3'].values[0] + thau_1 = self.crop_calendar.loc[self.crop_calendar['crop'] == crop, 'thau_1'].values[0] + thau_2 = self.crop_calendar.loc[self.crop_calendar['crop'] == crop, 'thau_2'].values[0] + thau_3 = self.crop_calendar.loc[self.crop_calendar['crop'] == crop, 'thau_3'].values[0] + sigma_1 = self.crop_calendar.loc[self.crop_calendar['crop'] == crop, 'sigma_1'].values[0] + sigma_2 = self.crop_calendar.loc[self.crop_calendar['crop'] == crop, 'sigma_2'].values[0] + sigma_3 = self.crop_calendar.loc[self.crop_calendar['crop'] == crop, 'sigma_3'].values[0] + + try: + thau_2 = float(thau_2) + sum = (beta_1 / (sigma_1 * math.sqrt(2 * math.pi))) * math.exp( + (float(int(day.strftime('%j')) - thau_1) ** 2) / (-2 * (sigma_1 ** 2))) + sum += (beta_2 / (sigma_2 * math.sqrt(2 * math.pi))) * math.exp( + (float(int(day.strftime('%j')) - thau_2) ** 2) / (-2 * (sigma_2 ** 2))) + sum += (beta_3 / (sigma_3 * math.sqrt(2 * math.pi))) * math.exp( + (float(int(day.strftime('%j')) - thau_3) ** 2) / (-2 * (sigma_3 ** 2))) + except ValueError: + aux = (beta_1 / (sigma_1 * math.sqrt(2 * math.pi))) * math.exp( + (float(int(day.strftime('%j')) - thau_1) ** 2) / (-2 * (sigma_1 ** 2))) + aux += (beta_3 / (sigma_3 * math.sqrt(2 * math.pi))) * math.exp( + (float(int(day.strftime('%j')) - thau_3) ** 2) / (-2 * (sigma_3 ** 2))) + sum = (beta_2 / (sigma_2 * math.sqrt(2 * math.pi))) * np.exp( + ((int(day.strftime('%j')) - daily_inputs[thau_2]).astype(np.float64)) ** 2 / (-2 * (sigma_2 ** 2))) + sum += aux + daily_inputs['FD_{0}'.format(crop)] = daily_inputs['exp'].multiply(sum) + + for crop in self.crop_list: + daily_inputs[crop] = daily_inputs[crop].multiply( + daily_inputs['FD_{0}'.format(crop)] / daily_inputs['d_{0}'.format(crop)]) + daily_emissions = daily_inputs.loc[:, ['timezone', 'geometry', 'nut_code']].copy() + daily_emissions['nh3'] = daily_inputs.loc[:, self.crop_list].sum(axis=1) + # From kg NH3-N to g NH3 + daily_emissions['nh3'] = daily_emissions['nh3'].multiply((17. / 14.) * 1000.) + + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'calculate_nh3_emissions', + timeit.default_timer() - spent_time) + return daily_emissions + + def add_dates(self, df_by_day): + spent_time = timeit.default_timer() + df_list = [] + for tstep, date in enumerate(self.date_array): + + df_aux = df_by_day[date.date()].copy().reset_index() + df_aux['date'] = pd.to_datetime(date, utc=True) + df_aux['date_utc'] = pd.to_datetime(date, utc=True) + df_aux['tstep'] = tstep + # df_aux = self.to_timezone(df_aux) + df_list.append(df_aux) + dataframe_by_day = pd.concat(df_list, ignore_index=True) + dataframe_by_day = self.to_timezone(dataframe_by_day) + dataframe_by_day.set_index(['FID', 'tstep'], inplace=True) + + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'add_dates', + timeit.default_timer() - spent_time) + return dataframe_by_day + + def calculate_daily_emissions(self, emissions): + spent_time = timeit.default_timer() + self.logger.write_log('Calculating daily emissions') + df_by_day = self.get_daily_inputs(emissions) + for day, daily_inputs in df_by_day.iteritems(): + df_by_day[day] = self.calculate_nh3_emissions(day, daily_inputs) + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'calculate_daily_emissions', + timeit.default_timer() - spent_time) + return df_by_day + + def calculate_hourly_emissions(self, emissions): + spent_time = timeit.default_timer() + self.logger.write_log('Calculating hourly emissions') + emissions['hour'] = emissions['date'].dt.hour + emissions['nh3'] = emissions.groupby('hour')['nh3'].apply( + lambda x: x.multiply(self.hourly_profiles.loc['nh3', x.name])) + + emissions['date'] = emissions['date_utc'] + emissions.drop(columns=['hour', 'date_utc'], axis=1, inplace=True) + + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'calculate_hourly_emissions', + timeit.default_timer() - spent_time) + return emissions + + def calculate_emissions(self): + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + emissions = self.calculate_yearly_emissions() + + df_by_day = self.calculate_daily_emissions(emissions) + emissions = self.add_dates(df_by_day) + emissions = self.calculate_hourly_emissions(emissions) + emissions = self.speciate(emissions) + + emissions.reset_index(inplace=True) + emissions['layer'] = 0 + emissions.set_index(['FID', 'layer', 'tstep'], inplace=True) + + self.logger.write_log('\t\tCrop fertilizers emissions calculated', message_level=2) + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'calculate_emissions', + timeit.default_timer() - spent_time) + return emissions diff --git a/hermesv3_bu/sectors/agricultural_crop_operations_sector.py b/hermesv3_bu/sectors/agricultural_crop_operations_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..70c69fc69082532807a78693ae40d1d7982d1635 --- /dev/null +++ b/hermesv3_bu/sectors/agricultural_crop_operations_sector.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python + +import os +import timeit +import pandas as pd +import numpy as np + +from hermesv3_bu.sectors.agricultural_sector import AgriculturalSector +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.logger.log import Log + + +class AgriculturalCropOperationsSector(AgriculturalSector): + def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, + vertical_levels, crop_list, nut_shapefile_path, land_uses_path, ef_dir, monthly_profiles_path, + weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, + molecular_weights_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path): + """ + + :param auxiliary_dir: Path to the directory where the necessary auxiliary files will be created if them are + not created yet. + :type auxiliary_dir: str + + :param grid_shp: Shapefile that contains the destination grid. It must contains the 'FID' (cell num). + :type grid_shp: GeoPandas.GeoDataframe + + :param clip: Path to the shapefile that contains the region of interest. + :type clip: str + + :param date_array: List of datetimes. + :type date_array: list(datetime.datetime, ...) + + :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain + the 'ORDER06' information with the NUT_code. + :type nut_shapefile_path: str + + :param source_pollutants: List of input pollutants to take into account. Agricultural livestock module can + calculate emissions derived from the next source pollutants: NH3, NOx expressed as PM10 and PM2.5 + ['pm10', 'pm25'] + :type source_pollutants: list + + :param crop_list: Crop list to take into account for the emission calculation. [barley, oats, rye, wheat] + :type crop_list: list + + :param land_uses_path: Path to the shapefile that contains all the land-uses. + :type land_uses_path: str. + + :param ef_dir: Path to the folder that contains all the CSV files with the information to calculate the + emissions. Each pollutant have to be in separated files (pm10.csv, pm25.csv): + Columns: [crop, operation, EF_pm10] and [crop, operation, EF_pm25]. + :type ef_dir: str + + :param monthly_profiles_path: Path to the CSV file that contains all the monthly profiles. The CSV file must + contain the following columns [P_month, January, February, ..., November, December] + The P_month code have to be the input pollutant. + :type monthly_profiles_path: str + + :param weekly_profiles_path: Path to the CSV file that contains all the weekly profiles. The CSV file must + contain the following columns [P_week, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday] + The P_week code have to be the input pollutant. + :type weekly_profiles_path: str + + :param hourly_profiles_path: Path to the CSV file that contains all the hourly profiles. The CSV file must + contain the following columns [P_hour, 0, 1, 2, 3, ..., 22, 23] + The P_week code have to be the input pollutant. + :type hourly_profiles_path: str + + :param speciation_map_path: Path to the CSV file that contains the speciation map. The CSV file must contain + the following columns [dst, src, description] + The 'dst' column will be used as output pollutant list and the 'src' column as their onw input pollutant + to be used as a fraction in the speciation profiles. + :type speciation_map_path: str + + :param speciation_profiles_path: Path to the file that contains all the speciation profiles. The CSV file + must contain the "Code" column with the value of each animal of the animal_list. The rest of columns + have to be the sames as the column 'dst' of the 'speciation_map_path' file. + :type speciation_profiles_path: str + + :param molecular_weights_path: Path to the CSV file that contains all the molecular weights needed. The CSV + file must contain the 'Specie' and 'MW' columns. + :type molecular_weights_path: str + + :param landuse_by_nut: + :param crop_by_nut: + + :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain + the 'ORDER07' information with the NUT_code. + :type nut_shapefile_path: str + """ + spent_time = timeit.default_timer() + logger.write_log('===== AGRICULTURAL CROP OPERATIONS SECTOR =====') + super(AgriculturalCropOperationsSector, self).__init__( + comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, nut_shapefile_path, source_pollutants, + vertical_levels, crop_list, land_uses_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, ef_dir, + monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path) + + self.months = self.get_date_array_by_month() + + self.logger.write_time_log('AgriculturalCropOperationsSector', '__init__', timeit.default_timer() - spent_time) + + def read_monthly_profiles(self, path): + """ + Read the DataFrame of the monthly profiles with the month number as columns. + + Add 'operation' to index. + + :param path: Path to the file that contains the monthly profiles. + :type path: str + + :return: DataFrame of the monthly profiles. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + profiles = super(AgriculturalCropOperationsSector, self).read_monthly_profiles(path) + + profiles.reset_index(inplace=True) + profiles.set_index(['P_month', 'operation'], inplace=True) + + self.logger.write_time_log('AgriculturalCropOperationsSector', 'read_monthly_profiles', + timeit.default_timer() - spent_time) + return profiles + + def get_date_array_by_month(self): + spent_time = timeit.default_timer() + + month_array = [hour.date().month for hour in self.date_array] + month_list, num_days = np.unique(month_array, return_counts=True) + + month_dict = {} + for month in month_list: + month_dict[month] = np.array(self.date_array)[month_array == month] + + self.logger.write_time_log('AgriculturalCropOperationsSector', 'get_date_array_by_month', + timeit.default_timer() - spent_time) + + return month_dict + + def calculate_distribution_by_month(self, month): + """ + EF units = kg/ha + :param month: + :return: + """ + spent_time = timeit.default_timer() + + month_distribution = self.crop_distribution.loc[:, ['timezone', 'geometry']].copy() + for pollutant in self.source_pollutants: + month_distribution[pollutant] = 0 + + emission_factors = pd.read_csv(os.path.join(self.ef_files_dir, '{0}.csv'.format(pollutant))) + emission_factors.set_index(['crop', 'operation'], inplace=True) + for crop in self.crop_list: + ef_c = emission_factors.loc[(crop, 'soil_cultivation'), 'EF_{0}'.format(pollutant)] + ef_h = emission_factors.loc[(crop, 'harvesting'), 'EF_{0}'.format(pollutant)] + m_c = self.monthly_profiles.loc[(crop, 'soil_cultivation'), month] + m_h = self.monthly_profiles.loc[(crop, 'harvesting'), month] + factor = ef_c * m_c + ef_h * m_h + # From Kg to g + factor *= 1000.0 + month_distribution[pollutant] += self.crop_distribution[crop].multiply(factor) + self.logger.write_time_log('AgriculturalCropOperationsSector', 'calculate_distribution_by_month', + timeit.default_timer() - spent_time) + + return month_distribution + + def add_dates(self, df_by_month): + spent_time = timeit.default_timer() + df_list = [] + for tstep, date in enumerate(self.date_array): + df_aux = df_by_month[date.date().month].copy().reset_index() + df_aux['date'] = pd.to_datetime(date, utc=True) + df_aux['date_utc'] = pd.to_datetime(date, utc=True) + df_aux['tstep'] = tstep + # df_aux = self.to_timezone(df_aux) + df_list.append(df_aux) + dataframe_by_day = pd.concat(df_list, ignore_index=True) + dataframe_by_day.set_index(['FID', 'tstep'], inplace=True) + dataframe_by_day = self.to_timezone(dataframe_by_day) + + self.logger.write_time_log('AgriculturalCropOperationsSector', 'add_dates', timeit.default_timer() - spent_time) + + return dataframe_by_day + + def calculate_hourly_emissions(self): + spent_time = timeit.default_timer() + + def get_wf(df): + """ + Get the Weekly Factor for the given dataframe depending on the date. + + :param df: DataFrame where find the weekly factor. df.name is the date. + :type df: DataFrame + + :return: DataFrame with only the WF column. + :rtype: DataFrame + """ + weekly_profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc[pollutant, :].to_dict(), + df.name) + df['WF'] = weekly_profile[df.name.weekday()] + return df.loc[:, ['WF']] + + def get_hf(df): + """ + Get the Hourly Factor for the given dataframe depending on the hour. + + :param df: DataFrame where find the hourly factor. df.name is the hour. + :type df: DataFrame + + :return: DataFrame with only the HF column. + :rtype: DataFrame + """ + hourly_profile = self.hourly_profiles.loc[pollutant, :].to_dict() + hour_factor = hourly_profile[df.name] + + df['HF'] = hour_factor + return df.loc[:, ['HF']] + + self.crop_distribution['date_as_date'] = self.crop_distribution['date'].dt.date + self.crop_distribution['month'] = self.crop_distribution['date'].dt.weekday + self.crop_distribution['weekday'] = self.crop_distribution['date'].dt.weekday + self.crop_distribution['hour'] = self.crop_distribution['date'].dt.hour + + for pollutant in self.source_pollutants: + self.crop_distribution['WF'] = self.crop_distribution.groupby(['date_as_date']).apply(get_wf) + + self.crop_distribution['HF'] = self.crop_distribution.groupby('hour').apply(get_hf) + self.crop_distribution[pollutant] = self.crop_distribution[pollutant].multiply( + self.crop_distribution['HF'] * self.crop_distribution['WF'], axis=0) + + self.crop_distribution.drop(columns=['month', 'weekday', 'hour', 'WF', 'HF', 'date_as_date'], inplace=True) + + self.logger.write_time_log('AgriculturalCropOperationsSector', 'calculate_hourly_emissions', + timeit.default_timer() - spent_time) + + return self.crop_distribution + + def calculate_emissions(self): + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + distribution_by_month = {} + for month in self.months.iterkeys(): + distribution_by_month[month] = self.calculate_distribution_by_month(month) + + self.crop_distribution = self.add_dates(distribution_by_month) + self.crop_distribution.drop('date_utc', axis=1, inplace=True) + self.crop_distribution = self.calculate_hourly_emissions() + self.crop_distribution = self.speciate(self.crop_distribution) + + self.crop_distribution['layer'] = 0 + + self.logger.write_log('\t\tCrop operations emissions calculated', message_level=2) + self.logger.write_time_log('AgriculturalCropOperationsSector', 'calculate_emissions', + timeit.default_timer() - spent_time) + return self.crop_distribution diff --git a/hermesv3_bu/sectors/agricultural_machinery_sector.py b/hermesv3_bu/sectors/agricultural_machinery_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..ffd297769867da91ec08178c7c94175859c315aa --- /dev/null +++ b/hermesv3_bu/sectors/agricultural_machinery_sector.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python + +import sys +import os +import timeit +from warnings import warn + +import geopandas as gpd +import pandas as pd +import numpy as np + +from hermesv3_bu.sectors.agricultural_sector import AgriculturalSector +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.logger.log import Log + + +class AgriculturalMachinerySector(AgriculturalSector): + def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, + vertical_levels, crop_list, nut_shapefile, machinery_list, land_uses_path, ef_files_dir, + monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, + machinery_distibution_nut_shapefile_path, deterioration_factor_path, load_factor_path, + vehicle_ratio_path, vehicle_units_path, vehicle_workhours_path, vehicle_power_path, + crop_machinery_by_nut): + spent_time = timeit.default_timer() + + logger.write_log('===== AGRICULTURAL MACHINERY SECTOR =====') + super(AgriculturalMachinerySector, self).__init__( + comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, nut_shapefile, source_pollutants, + vertical_levels, crop_list, land_uses_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, + ef_files_dir, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path) + + self.machinery_list = machinery_list + self.crop_machinery_by_nut = self.read_profiles(crop_machinery_by_nut) + + self.crop_distribution = self.get_crop_distribution_by_nut( + self.crop_distribution, machinery_distibution_nut_shapefile_path, nut_code='ORDER07') + + self.months = self.get_date_array_by_month() + + self.deterioration_factor = self.read_profiles(deterioration_factor_path) + self.load_factor = self.read_profiles(load_factor_path) + self.vehicle_ratio = self.read_profiles(vehicle_ratio_path) + self.vehicle_units = self.read_profiles(vehicle_units_path) + self.vehicle_workhours = self.read_profiles(vehicle_workhours_path) + self.vehicle_power = self.read_profiles(vehicle_power_path) + self.emission_factors = self.read_profiles(ef_files_dir) + + self.logger.write_time_log('AgriculturalMachinerySector', '__init__', timeit.default_timer() - spent_time) + + def get_crop_distribution_by_nut(self, crop_distribution, nut_shapefile, nut_code=None, write_crop_by_nut=False): + spent_time = timeit.default_timer() + + def get_fraction(dataframe): + total_crop_sum = self.crop_machinery_by_nut.loc[self.crop_machinery_by_nut[nut_code] == int(dataframe.name), + self.crop_list].values.sum() + dataframe['fraction'] = dataframe[self.crop_list].sum(axis=1) / total_crop_sum + + return dataframe.loc[:, ['fraction']] + + crop_distribution.reset_index(inplace=True) + + crop_distribution_nut_path = os.path.join(self.auxiliary_dir, 'crops', 'crops_nut.shp') + if not os.path.exists(crop_distribution_nut_path): + nut_shapefile = gpd.read_file(nut_shapefile) + if nut_code is not None: + nut_shapefile = nut_shapefile.loc[:, [nut_code, 'geometry']] + + nut_shapefile = nut_shapefile.to_crs(crop_distribution.crs) + crop_distribution['src_inter_fraction'] = crop_distribution.geometry.area + + crop_distribution = self.spatial_overlays(crop_distribution, nut_shapefile, how='intersection') + crop_distribution['src_inter_fraction'] = \ + crop_distribution.geometry.area / crop_distribution['src_inter_fraction'] + + crop_distribution[self.crop_list] = \ + crop_distribution.loc[:, self.crop_list].multiply(crop_distribution["src_inter_fraction"], axis="index") + + crop_distribution.drop(columns=['src_inter_fraction', 'idx1', 'idx2'], inplace=True) + + if write_crop_by_nut: + crop_distribution.loc[:, self.crop_list + [nut_code]].groupby(nut_code).sum().reset_index().to_csv( + self.crop_machinery_by_nut) + crop_distribution['fraction'] = crop_distribution.groupby(nut_code).apply(get_fraction) + crop_distribution.drop(columns=self.crop_list, inplace=True) + crop_distribution.rename(columns={nut_code: 'NUT_code'}, inplace=True) + + IoShapefile(self.comm).write_shapefile_serial(crop_distribution, crop_distribution_nut_path) + else: + crop_distribution = IoShapefile(self.comm).read_shapefile(crop_distribution_nut_path) + + self.logger.write_time_log('AgriculturalMachinerySector', 'get_crop_distribution_by_nut', + timeit.default_timer() - spent_time) + + return crop_distribution + + def get_date_array_by_month(self): + spent_time = timeit.default_timer() + month_array = [hour.date().month for hour in self.date_array] + month_list, num_days = np.unique(month_array, return_counts=True) + + month_dict = {} + for month in month_list: + month_dict[month] = np.array(self.date_array)[month_array == month] + + self.logger.write_time_log('AgriculturalMachinerySector', 'get_date_array_by_month', + timeit.default_timer() - spent_time) + return month_dict + + def calcualte_yearly_emissions_by_nut_vehicle(self): + spent_time = timeit.default_timer() + + def get_n(df): + df['N'] = self.vehicle_units.loc[self.vehicle_units['code'] == df.name[0], df.name[1]].values[0] + return df.loc[:, ['N']] + + def get_s(df): + df['S'] = self.vehicle_ratio.loc[ + (self.vehicle_ratio['code'] == df.name[0]) & (self.vehicle_ratio['technology'] == df.name[2]), + df.name[1]].values[0] + return df.loc[:, ['S']] + + def get_t(df): + + try: + df['T'] = self.vehicle_workhours.loc[(self.vehicle_workhours['code'] == df.name[0]) & + (self.vehicle_workhours['technology'] == df.name[2]), + df.name[1]].values[0] + except IndexError: + df['T'] = np.nan + df.loc[df['T'].isna(), 'T'] = self.vehicle_workhours.loc[ + (self.vehicle_workhours['code'] == df.name[0]) & (self.vehicle_workhours['technology'] == 'default'), + df.name[1]].values[0] + return df.loc[:, ['T']] + + def get_p(df): + df['P'] = self.vehicle_power.loc[self.vehicle_power['code'] == df.name[0], df.name[1]].values[0] + return df.loc[:, ['P']] + + def get_lf(df): + df['LF'] = self.load_factor.loc[self.load_factor['vehicle'] == df.name, 'LF'].values[0] + return df.loc[:, ['LF']] + + def get_df(df): + try: + df['DF_{0}'.format(in_p)] = 1 + self.deterioration_factor.loc[ + (self.deterioration_factor['vehicle'] == df.name[0]) & ( + self.deterioration_factor['technology'] == df.name[1]), 'DF_{0}'.format(in_p)].values[0] + except (KeyError, IndexError): + df['DF_{0}'.format(in_p)] = 1 + return df.loc[:, ['DF_{0}'.format(in_p)]] + + def get_ef(df): + emission_factors = self.emission_factors.loc[(self.emission_factors['vehicle'] == df.name[0]) & + (self.emission_factors['technology'] == df.name[1]), + ['power_min', 'power_max', 'EF_{0}'.format(in_p)]] + df['EF_{0}'.format(in_p)] = None + for i, emission_factor in emission_factors.iterrows(): + if np.isnan(emission_factor['power_min']) and not np.isnan(emission_factor['power_max']): + df.loc[df['P'] < emission_factor['power_max'], 'EF_{0}'.format(in_p)] = emission_factor[ + 'EF_{0}'.format(in_p)] + elif not np.isnan(emission_factor['power_min']) and not np.isnan(emission_factor['power_max']): + df.loc[(df['P'] >= emission_factor['power_min']) & (df['P'] < emission_factor['power_max']), + 'EF_{0}'.format(in_p)] = emission_factor['EF_{0}'.format(in_p)] + elif not np.isnan(emission_factor['power_min']) and np.isnan(emission_factor['power_max']): + df.loc[df['P'] >= emission_factor['power_min'], 'EF_{0}'.format(in_p)] = emission_factor[ + 'EF_{0}'.format(in_p)] + else: + df['EF_{0}'.format(in_p)] = emission_factor['EF_{0}'.format(in_p)] + + return df.loc[:, ['EF_{0}'.format(in_p)]] + + nut_codes = np.unique(self.crop_distribution['NUT_code'].values.astype(np.int16)) + tech = np.unique(self.vehicle_ratio['technology'].values) + + database = pd.DataFrame(None, pd.MultiIndex.from_product( + [nut_codes, self.machinery_list, tech], names=['NUT_code', 'vehicle', 'technology'])) + database['N'] = database.groupby(['NUT_code', 'vehicle']).apply(get_n) + database['S'] = database.groupby(['NUT_code', 'vehicle', 'technology']).apply(get_s) + database.dropna(inplace=True) + database['T'] = database.groupby(['NUT_code', 'vehicle', 'technology']).apply(get_t) + database['P'] = database.groupby(['NUT_code', 'vehicle']).apply(get_p) + database['LF'] = database.groupby('vehicle').apply(get_lf) + for in_p in self.source_pollutants: + database['DF_{0}'.format(in_p)] = database.groupby(['vehicle', 'technology']).apply(get_df) + + database['EF_{0}'.format(in_p)] = database.groupby(['vehicle', 'technology'])[['P']].apply(get_ef) + + database[in_p] = database['N'] * database['S'] * database['T'] * database['P'] * database['LF'] * \ + database['DF_{0}'.format(in_p)] * database['EF_{0}'.format(in_p)] + + database.drop(columns=['DF_{0}'.format(in_p), 'EF_{0}'.format(in_p)], inplace=True) + + database.drop(columns=['N', 'S', 'T', 'P', 'LF'], inplace=True) + + database = database.groupby(['NUT_code', 'vehicle']).sum() + self.logger.write_time_log('AgriculturalMachinerySector', 'calcualte_yearly_emissions_by_nut_vehicle', + timeit.default_timer() - spent_time) + return database + + def calculate_monthly_emissions_by_nut(self, month): + spent_time = timeit.default_timer() + + def get_mf(df, month_num): + df['MF'] = self.monthly_profiles.loc[df.name, month_num] + return df.loc[:, ['MF']] + # month_distribution = self.crop_distribution.loc[:, ['FID', 'timezone', 'geometry']].copy() + dataframe = self.calcualte_yearly_emissions_by_nut_vehicle().reset_index() + dataframe['MF'] = dataframe.groupby('vehicle').apply( + lambda x: get_mf(x, month) + ) + dataframe[self.source_pollutants] = dataframe[self.source_pollutants].multiply(dataframe['MF'], axis=0) + + dataframe.drop(columns=['MF'], inplace=True) + + dataframe = dataframe.groupby('NUT_code').sum() + + self.logger.write_time_log('AgriculturalMachinerySector', 'calculate_monthly_emissions_by_nut', + timeit.default_timer() - spent_time) + return dataframe + + def distribute(self, dataframe): + spent_time = timeit.default_timer() + + def distribute_by_nut(df, nut_emissions): + aux = df.apply(lambda row: row * nut_emissions) + return aux.loc[:, self.source_pollutants] + + self.crop_distribution.reset_index(inplace=True) + self.crop_distribution[self.source_pollutants] = self.crop_distribution.groupby('NUT_code')['fraction'].apply( + lambda x: distribute_by_nut(x, dataframe.loc[int(x.name), self.source_pollutants]) + ) + self.crop_distribution.drop(columns=['fraction', 'NUT_code'], inplace=True) + timezones = self.crop_distribution.groupby('FID')[['timezone']].first() + self.crop_distribution = self.crop_distribution.reset_index().groupby('FID').sum() + + self.crop_distribution['timezone'] = timezones + self.crop_distribution.reset_index(inplace=True) + self.logger.write_time_log('AgriculturalMachinerySector', 'distribute', + timeit.default_timer() - spent_time) + return self.crop_distribution + + def add_dates(self, df_by_month): + spent_time = timeit.default_timer() + + df_list = [] + for tstep, date in enumerate(self.date_array): + df_aux = df_by_month[date.date().month].copy() + df_aux['date'] = pd.to_datetime(date, utc=True) + df_aux['date_utc'] = pd.to_datetime(date, utc=True) + df_aux['tstep'] = tstep + # df_aux = self.to_timezone(df_aux) + df_list.append(df_aux) + dataframe_by_day = pd.concat(df_list, ignore_index=True) + + dataframe_by_day = self.to_timezone(dataframe_by_day) + self.logger.write_time_log('AgriculturalMachinerySector', 'add_dates', timeit.default_timer() - spent_time) + return dataframe_by_day + + def calculate_hourly_emissions(self): + spent_time = timeit.default_timer() + + def get_wf(df): + """ + Get the Weekly Factor for the given dataframe depending on the date. + + :param df: DataFrame where find the weekly factor. df.name is the date. + :type df: DataFrame + + :return: DataFrame with only the WF column. + :rtype: DataFrame + """ + weekly_profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc['default', :].to_dict(), + df.name) + df['WF'] = weekly_profile[df.name.weekday()] + return df.loc[:, ['WF']] + + def get_hf(df): + """ + Get the Hourly Factor for the given dataframe depending on the hour. + + :param df: DataFrame where find the hourly factor. df.name is the hour. + :type df: DataFrame + + :return: DataFrame with only the HF column. + :rtype: DataFrame + """ + hourly_profile = self.hourly_profiles.loc['default', :].to_dict() + hour_factor = hourly_profile[df.name] + + df['HF'] = hour_factor + return df.loc[:, ['HF']] + + self.crop_distribution['date_as_date'] = self.crop_distribution['date'].dt.date + self.crop_distribution['month'] = self.crop_distribution['date'].dt.weekday + self.crop_distribution['weekday'] = self.crop_distribution['date'].dt.weekday + self.crop_distribution['hour'] = self.crop_distribution['date'].dt.hour + + for pollutant in self.source_pollutants: + self.crop_distribution['WF'] = self.crop_distribution.groupby(['date_as_date']).apply(get_wf) + + self.crop_distribution['HF'] = self.crop_distribution.groupby('hour').apply(get_hf) + self.crop_distribution[pollutant] = self.crop_distribution[pollutant].multiply( + self.crop_distribution['HF'] * self.crop_distribution['WF'], axis=0) + + self.crop_distribution.drop(columns=['month', 'weekday', 'hour', 'WF', 'HF', 'date_as_date'], inplace=True) + self.logger.write_time_log('AgriculturalMachinerySector', 'calculate_hourly_emissions', + timeit.default_timer() - spent_time) + return self.crop_distribution + + def calculate_emissions(self): + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + distribution_by_month = {} + for month in self.months.iterkeys(): + distribution_by_month[month] = self.calculate_monthly_emissions_by_nut(month) + distribution_by_month[month] = self.distribute(distribution_by_month[month]) + + self.crop_distribution = self.add_dates(distribution_by_month) + self.crop_distribution.drop('date_utc', axis=1, inplace=True) + self.crop_distribution = self.calculate_hourly_emissions() + self.crop_distribution['layer'] = 0 + self.crop_distribution = self.crop_distribution.groupby(['FID', 'layer', 'tstep']).sum() + self.crop_distribution = self.speciate(self.crop_distribution) + + self.logger.write_log('\t\tAgricultural machinery emissions calculated', message_level=2) + self.logger.write_time_log('AgriculturalMachinerySector', 'calculate_emissions', + timeit.default_timer() - spent_time) + return self.crop_distribution diff --git a/hermesv3_bu/sectors/agricultural_sector.py b/hermesv3_bu/sectors/agricultural_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..cb8fbac85ff4f5274a39d343e0dd0b5227a7f5cf --- /dev/null +++ b/hermesv3_bu/sectors/agricultural_sector.py @@ -0,0 +1,489 @@ +#!/usr/bin/env python + +import sys +import os +import timeit + +import numpy as np +import pandas as pd +import geopandas as gpd +from mpi4py import MPI + +from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.io_server.io_raster import IoRaster +from hermesv3_bu.logger.log import Log + + +class AgriculturalSector(Sector): + def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, nut_shapefile, + source_pollutants, vertical_levels, crop_list, land_uses_path, land_use_by_nut, crop_by_nut, + crop_from_landuse_path, ef_files_dir, monthly_profiles_path, weekly_profiles_path, + hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path): + """ + Initialise the common class for agricultural sectors (fertilizers, crop operations and machinery) + + :param comm_agr: Common communicator for all the agricultural sectors. + :type comm_agr: MPI.Comm + + :param comm: Comunicator for the current sector. + :type comm: MPI.Comm + + :param logger: Logger + :type logger: Log + + :param auxiliary_dir: Path to the directory where the necessary auxiliary files will be created if them are not + created yet. + :type auxiliary_dir: str + + :param grid_shp: Shapefile with the grid horizontal distribution. + :type grid_shp: GeoDataFrame + + :param date_array: List of datetimes. + :type date_array: list(datetime.datetime, ...) + + :param source_pollutants: List of input pollutants to take into account. + :type source_pollutants: list + + :param vertical_levels: List of top level of each vertical layer. + :type vertical_levels: list + + :param nut_shapefile: Shapefile path to the one that have the NUT_codes. + :type nut_shapefile: str + + :param crop_list: List of crops to take into account for that sector. + :type crop_list: list + + :param land_uses_path: Path to the shapefile that contains all the land uses. + :type land_uses_path: str + + :param land_use_by_nut: Path to the DataFrame with the area for each land use of each NUT code. + columns: NUT, land_use, area + :type land_use_by_nut: str + + :param crop_by_nut: Path to the DataFrame with the amount of crops for each NUT code. + That DataFrame have the 'code' column with the NUT code and as many columns as crops. + :type crop_by_nut: str + + :param crop_from_landuse_path: Path to the DataFrame with the mapping between crops and land uses. + That CSV have as value separator a semicolon and a comma between elements of the same column. + There are needed the following columns: crop, land_use and weight. + The land_use and weight columns can have as elements as needed, separated by commas, but both have to have + the same length. + The land_use column contains the list, or unique value, of the land use that contains that crop. + The weight column contains each weight of each selected land use. + :type crop_from_landuse_path: str + + :param ef_files_dir: Path to the folder that contains all the Emission Factors. + :type ef_files_dir: str + + :param monthly_profiles_path: Path to the CSV file that contains all the monthly profiles. The CSV file must + contain the following columns [P_month, January, February, March, April, May, June, July, August, September, + October, November, December] + :type monthly_profiles_path: str + + :param weekly_profiles_path: Path to the CSV file that contains all the weekly profiles. The CSV file must + contain the following columns [P_week, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday] + :type weekly_profiles_path: str + + :param hourly_profiles_path: Path to the CSV file that contains all the hourly profiles. The CSV file must + contain the following columns [P_hour, 0, 1, 2, 3, ..., 22, 23] + :type hourly_profiles_path: str + + :param speciation_map_path: Path to the CSV file that contains the speciation map. The CSV file must contain + the following columns [dst, src, description] + The 'dst' column will be used as output pollutant list and the 'src' column as their onw input pollutant + to be used as a fraction in the speciation profiles. + :type speciation_map_path: str + + :param speciation_profiles_path: Path to the file that contains all the speciation profiles. The CSV file + must contain the "Code" column with the value of each animal of the animal_list. The rest of columns + have to be the sames as the column 'dst' of the 'speciation_map_path' file. + :type speciation_profiles_path: str + + :param molecular_weights_path: Path to the CSV file that contains all the molecular weights needed. The CSV + file must contain the 'Specie' and 'MW' columns. + :type molecular_weights_path: str + """ + spent_time = timeit.default_timer() + + super(AgriculturalSector, self).__init__( + comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path) + + self.comm_agr = comm_agr + self.nut_shapefile = nut_shapefile + self.crop_list = crop_list + self.land_uses_path = land_uses_path + self.ef_files_dir = ef_files_dir + self.land_use_by_nut = land_use_by_nut + self.crop_by_nut = crop_by_nut + self.crop_from_landuse = self.get_crop_from_land_uses(crop_from_landuse_path) + self.crop_distribution = self.get_crops_by_dst_cell( + os.path.join(auxiliary_dir, 'agriculture', 'crops', 'crops.shp')) + self.logger.write_time_log('AgriculturalSector', '__init__', timeit.default_timer() - spent_time) + + def involved_grid_cells(self, src_shp): + spent_time = timeit.default_timer() + grid_shp = IoShapefile(self.comm).split_shapefile(self.grid_shp) + src_union = src_shp.to_crs(grid_shp.crs).geometry.unary_union + grid_shp = grid_shp.loc[grid_shp.intersects(src_union), :] + + grid_shp_list = self.comm.gather(grid_shp, root=0) + animal_dist_list = [] + if self.comm.Get_rank() == 0: + for small_grid in grid_shp_list: + animal_dist_list.append(src_shp.loc[src_shp.intersects( + small_grid.to_crs(src_shp.crs).geometry.unary_union), :]) + grid_shp = pd.concat(grid_shp_list) + grid_shp = np.array_split(grid_shp, self.comm.Get_size()) + else: + grid_shp = None + animal_dist_list = None + + grid_shp = self.comm.scatter(grid_shp, root=0) + + animal_dist = self.comm.scatter(animal_dist_list, root=0) + + self.logger.write_time_log('AgriculturalSector', 'involved_grid_cells', timeit.default_timer() - spent_time) + + return grid_shp, animal_dist + + def calculate_num_days(self): + spent_time = timeit.default_timer() + + day_array = [hour.date() for hour in self.date_array] + days, num_days = np.unique(day_array, return_counts=True) + + day_dict = {} + for key, value in zip(days, num_days): + day_dict[key] = value + self.logger.write_time_log('AgriculturalSector', 'calculate_num_days', timeit.default_timer() - spent_time) + return day_dict + + def get_crop_from_land_uses(self, crop_from_landuse_path): + import re + spent_time = timeit.default_timer() + + crop_from_landuse = pd.read_csv(crop_from_landuse_path, sep=';') + crop_dict = {} + for i, element in crop_from_landuse.iterrows(): + # if element.crop in self.crop_list: + land_uses = list(map(str, re.split(' , |, | ,|,| ', element.land_use))) + weights = list(map(str, re.split(' , |, | ,|,| ', element.weight))) + crop_dict[element.crop] = zip(land_uses, weights) + self.logger.write_time_log('AgriculturalSector', 'get_crop_from_land_uses', timeit.default_timer() - spent_time) + + return crop_dict + + def get_involved_land_uses(self): + spent_time = timeit.default_timer() + + land_uses_list = [] + for land_use_and_weight_list in self.crop_from_landuse.itervalues(): + for land_use_and_weight in land_use_and_weight_list: + land_use = int(land_use_and_weight[0]) + if land_use not in land_uses_list: + land_uses_list.append(land_use) + self.logger.write_time_log('AgriculturalSector', 'get_involved_land_uses', timeit.default_timer() - spent_time) + + return land_uses_list + + def get_land_use_src_by_nut_old(self, land_uses): + spent_time = timeit.default_timer() + + df_land_use_with_nut = gpd.read_file(self.land_uses_path) + + df_land_use_with_nut.rename(columns={'CODE': 'NUT', 'gridcode': 'land_use'}, inplace=True) + + df_land_use_with_nut = df_land_use_with_nut.loc[df_land_use_with_nut['land_use'].isin(land_uses), :] + + df_land_use_with_nut = self.spatial_overlays(df_land_use_with_nut, + self.clip.shapefile.to_crs(df_land_use_with_nut.crs)) + + self.logger.write_time_log('AgriculturalSector', 'get_land_use_src_by_nut', timeit.default_timer() - spent_time) + return df_land_use_with_nut + + def get_land_use_src_by_nut(self, land_uses): + spent_time = timeit.default_timer() + land_use_src_by_nut_path = os.path.join(self.auxiliary_dir, 'agriculture', 'land_uses', 'land_uses_src.shp') + if not os.path.exists(land_use_src_by_nut_path): + land_uses_clipped = IoRaster(self.comm_agr).clip_raster_with_shapefile_poly( + self.land_uses_path, self.clip.shapefile, + os.path.join(self.auxiliary_dir, 'agriculture', 'land_uses', 'land_uses_clip.tif'), values=land_uses) + + land_uses_shp = IoRaster(self.comm_agr).to_shapefile_serie(land_uses_clipped) + ccaa_shp = IoShapefile(self.comm_agr).read_shapefile_serial(self.nut_shapefile).to_crs(land_uses_shp.crs) + ccaa_shp.drop(columns=['NAME', 'ORDER06'], inplace=True) + ccaa_shp.rename(columns={'CODE': 'NUT'}, inplace=True) + land_use_src_by_nut = self.spatial_overlays(land_uses_shp, ccaa_shp, how='intersection') + land_use_src_by_nut.drop(columns=['idx1', 'idx2', 'CELL_ID'], inplace=True) + land_use_src_by_nut.rename(columns={'data': 'land_use'}, inplace=True) + land_use_src_by_nut['land_use'] = land_use_src_by_nut['land_use'].astype(np.int16) + land_use_src_by_nut.reset_index(inplace=True, drop=True) + IoShapefile(self.comm_agr).write_shapefile_serial(land_use_src_by_nut, land_use_src_by_nut_path) + else: + land_use_src_by_nut = IoShapefile(self.comm_agr).read_shapefile_serial(land_use_src_by_nut_path) + + self.logger.write_time_log('AgriculturalSector', 'get_land_use_src_by_nut', timeit.default_timer() - spent_time) + return land_use_src_by_nut + + def get_tot_land_use_by_nut(self, land_uses): + spent_time = timeit.default_timer() + df = pd.read_csv(self.land_use_by_nut) + df = df.loc[df['land_use'].isin(land_uses), :] + self.logger.write_time_log('AgriculturalSector', 'get_tot_land_use_by_nut', timeit.default_timer() - spent_time) + + return df + + def get_land_use_by_nut_csv(self, land_use_distribution_src_nut, land_uses): + """ + + :param land_use_distribution_src_nut: Shapefile with the polygons of all the land uses for each NUT. + :type land_use_distribution_src_nut: GeoDataFrame + + :param land_uses: Land uses to take into account. + :type land_uses: list + + :return: + """ + spent_time = timeit.default_timer() + + land_use_distribution_src_nut['area'] = land_use_distribution_src_nut.area + land_use_by_nut = land_use_distribution_src_nut.groupby(['NUT', 'land_use']).sum().reset_index() + land_use_by_nut = land_use_by_nut.loc[land_use_by_nut['land_use'].isin(land_uses), :] + + self.logger.write_time_log('AgriculturalSector', 'get_land_use_by_nut_csv', timeit.default_timer() - spent_time) + return land_use_by_nut + + def land_use_to_crop_by_nut(self, land_use_by_nut, nuts=None): + """ + Get the amount of crop by involved NUT. + + :param land_use_by_nut: Area of each land use for each NUT + :type land_use_by_nut: DataFrame + + :param nuts: NUT list to take into account. None for all of them. + :type nuts: list + + :return: Amount of crop by NUT. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + if nuts is not None: + land_use_by_nut = land_use_by_nut.loc[land_use_by_nut['NUT'].isin(nuts), :] + new_dict = pd.DataFrame() + for nut in np.unique(land_use_by_nut['NUT']): + aux_dict = {'NUT': [nut]} + + for crop, landuse_weight_list in self.crop_from_landuse.iteritems(): + aux = 0 + for landuse, weight in landuse_weight_list: + try: + aux += land_use_by_nut.loc[(land_use_by_nut['land_use'] == int(landuse)) & + (land_use_by_nut['NUT'] == nut), 'area'].values[0] * float(weight) + except IndexError: + # TODO understand better that error + pass + aux_dict[crop] = [aux] + new_dict = new_dict.append(pd.DataFrame.from_dict(aux_dict), ignore_index=True) + new_dict.set_index('NUT', inplace=True) + + self.logger.write_time_log('AgriculturalSector', 'land_use_to_crop_by_nut', timeit.default_timer() - spent_time) + return new_dict + + def get_crop_shape_by_nut(self, crop_by_nut, tot_crop_by_nut): + """ + Calculate the fraction of crop for each NUT involved on the simulated domain. + + :param crop_by_nut: Amount of crop by NUT on the simulated domain. + :type crop_by_nut: DataFrame + + :param tot_crop_by_nut: Total amount of crop by NUT. + :type tot_crop_by_nut: DataFrame + + :return: Fraction of involved crop for NUT. + :rtype: DataFrame( + """ + spent_time = timeit.default_timer() + + crop_share_by_nut = crop_by_nut.copy() + for crop in crop_by_nut.columns: + crop_share_by_nut[crop] = crop_by_nut[crop] / tot_crop_by_nut[crop] + self.logger.write_time_log('AgriculturalSector', 'get_crop_shape_by_nut', timeit.default_timer() - spent_time) + + return crop_share_by_nut + + def get_crop_area_by_nut(self, crop_share_by_nut): + """ + Calculate the amount of crop for each NUT. + + :param crop_share_by_nut: GeoDataFrame with the fraction of crop for each NUT. That fraction means the quantity + of the NUT crop involved on the simulation. If the complete NUT is fulfilled on the domain is it 1. + :type crop_share_by_nut: DataFrame + + :return: Amount of crop for each NUT. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + crop_by_nut = pd.read_csv(self.crop_by_nut) + crop_by_nut.drop(columns='name', inplace=True) + + crop_by_nut['code'] = crop_by_nut['code'].astype(np.int16) + crop_by_nut.set_index('code', inplace=True) + crop_by_nut = crop_by_nut.loc[crop_share_by_nut.index, :] + crop_area_by_nut = crop_share_by_nut * crop_by_nut + + self.logger.write_time_log('AgriculturalSector', 'get_crop_area_by_nut', timeit.default_timer() - spent_time) + return crop_area_by_nut + + def calculate_crop_distribution_src(self, crop_area_by_nut, land_use_distribution_src_nut): + """ + Calculate the crop distribution on the source resolution. + + :param crop_area_by_nut: Amount of crop on each NUT. + :type crop_area_by_nut: DataFrame + + :param land_use_distribution_src_nut: Source distribution land uses with their calculated areas. + :type land_use_distribution_src_nut: GeoDataFrame + + :return: Crop distribution on the source resolution. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + + crop_distribution_src = land_use_distribution_src_nut.loc[:, ['NUT', 'geometry']] + for crop, landuse_weight_list in self.crop_from_landuse.iteritems(): + crop_distribution_src[crop] = 0 + for landuse, weight in landuse_weight_list: + crop_distribution_src.loc[land_use_distribution_src_nut['land_use'] == int(landuse), crop] += \ + land_use_distribution_src_nut.loc[land_use_distribution_src_nut['land_use'] == int(landuse), + 'area'] * float(weight) + for nut in np.unique(crop_distribution_src['NUT']): + for crop in crop_area_by_nut.columns.values: + crop_distribution_src.loc[crop_distribution_src['NUT'] == nut, crop] /= crop_distribution_src.loc[ + crop_distribution_src['NUT'] == nut, crop].sum() + for nut in np.unique(crop_distribution_src['NUT']): + for crop in crop_area_by_nut.columns.values: + + crop_distribution_src.loc[crop_distribution_src['NUT'] == nut, crop] *= \ + crop_area_by_nut.loc[nut, crop] + self.logger.write_time_log('AgriculturalSector', 'calculate_crop_distribution_src', + timeit.default_timer() - spent_time) + + return crop_distribution_src + + def get_crop_distribution_in_dst_cells(self, crop_distribution): + """ + Regrid the crop distribution in the source resolution to the grid resolution. + + :param crop_distribution: Crop distribution in source resolution. + :type crop_distribution: pandas.GeoDataFrame + + :return: Crop by grid cell. + :rtype: pandas.GeoDataFrame + """ + spent_time = timeit.default_timer() + crop_list = list(np.setdiff1d(crop_distribution.columns.values, ['NUT', 'geometry'])) + + crop_distribution = crop_distribution.to_crs(self.grid_shp.crs) + crop_distribution['src_inter_fraction'] = crop_distribution.geometry.area + crop_distribution = self.spatial_overlays(crop_distribution, self.grid_shp, how='intersection') + crop_distribution['src_inter_fraction'] = \ + crop_distribution.geometry.area / crop_distribution['src_inter_fraction'] + + crop_distribution[crop_list] = crop_distribution.loc[:, crop_list].multiply( + crop_distribution["src_inter_fraction"], axis="index") + + crop_distribution = crop_distribution.loc[:, crop_list + ['FID']].groupby('FID').sum() + + crop_distribution = gpd.GeoDataFrame(crop_distribution, crs=self.grid_shp.crs, + geometry=self.grid_shp.loc[crop_distribution.index, 'geometry']) + crop_distribution.reset_index(inplace=True) + crop_distribution.set_index('FID', inplace=True) + + self.logger.write_time_log('AgriculturalSector', 'get_crop_distribution_in_dst_cells', + timeit.default_timer() - spent_time) + return crop_distribution + + def get_crops_by_dst_cell(self, file_path): + """ + Create, or read if it is already created, the crop distribution over the grid cells. + + The created crop distribution file contains all the available crops, but the returned shapefile only contains + the involved crops on that sector. + + :param file_path: Path to the auxiliary file where is stored the crop distribution, or will be stored. + :type file_path: str + + :return: GeoDataFrame with the crop distribution over the grid cells. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + if not os.path.exists(file_path): + if self.comm_agr.Get_rank() == 0: + self.logger.write_log('Creating the crop distribution shapefile on the grid resolution.', + message_level=2) + involved_land_uses = self.get_involved_land_uses() + + land_use_distribution_src_nut = self.get_land_use_src_by_nut(involved_land_uses) + + land_use_by_nut = self.get_land_use_by_nut_csv(land_use_distribution_src_nut, involved_land_uses) + tot_land_use_by_nut = self.get_tot_land_use_by_nut(involved_land_uses) + + crop_by_nut = self.land_use_to_crop_by_nut(land_use_by_nut) + tot_crop_by_nut = self.land_use_to_crop_by_nut( + tot_land_use_by_nut, nuts=list(np.unique(land_use_by_nut['NUT']))) + + crop_shape_by_nut = self.get_crop_shape_by_nut(crop_by_nut, tot_crop_by_nut) + crop_area_by_nut = self.get_crop_area_by_nut(crop_shape_by_nut) + + crop_distribution_src = self.calculate_crop_distribution_src( + crop_area_by_nut, land_use_distribution_src_nut) + + crop_distribution_dst = self.get_crop_distribution_in_dst_cells(crop_distribution_src) + + crop_distribution_dst = self.add_timezone(crop_distribution_dst) + IoShapefile(self.comm).write_shapefile_serial(crop_distribution_dst, file_path) + else: + self.logger.write_log('Waiting for the master process that creates the crop distribution shapefile.', + message_level=2) + crop_distribution_dst = None + self.comm_agr.Barrier() + if self.comm.Get_rank() == 0 and self.comm_agr.Get_rank() != 0: + # Every master rank read the created crop distribution shapefile. + crop_distribution_dst = IoShapefile(self.comm).read_shapefile_serial(file_path) + self.comm.Barrier() + + crop_distribution_dst = IoShapefile(self.comm).split_shapefile(crop_distribution_dst) + else: + crop_distribution_dst = IoShapefile(self.comm).read_shapefile_parallel(file_path) + crop_distribution_dst.set_index('FID', inplace=True, drop=True) + # Filtering crops by used on the subsector (operations, fertilizers, machinery) + crop_distribution_dst = crop_distribution_dst.loc[:, self.crop_list + ['timezone', 'geometry']] + + self.logger.write_time_log('AgriculturalSector', 'get_crops_by_dst_cell', timeit.default_timer() - spent_time) + return crop_distribution_dst + + @staticmethod + def get_agricultural_processor_list(sector_dict): + """ + Select the common ranks for that ones that will work on some agricultural sector. + + The agricultural sectors are 'crop_operations', 'crop_fertilizers' and 'agricultural_machinery'. + + :param sector_dict: Rank distribution for all the sectors. + :type sector_dict: dict + + :return: List of ranks involved on some agricultural sector. + :rtype: list + """ + rank_list = [] + + for sector, sector_procs in sector_dict.iteritems(): + if sector in ['crop_operations', 'crop_fertilizers', 'agricultural_machinery']: + rank_list += sector_procs + rank_list = sorted(rank_list) + return rank_list diff --git a/hermesv3_bu/sectors/aviation_sector.py b/hermesv3_bu/sectors/aviation_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..a9718fd42da441ca2343a66888fdd8fbacee342c --- /dev/null +++ b/hermesv3_bu/sectors/aviation_sector.py @@ -0,0 +1,1039 @@ +#!/usr/bin/env python + +import sys +import os +import timeit +from hermesv3_bu.logger.log import Log +import numpy as np +import pandas as pd +import geopandas as gpd +from warnings import warn + +from hermesv3_bu.sectors.sector import Sector + +PHASE_TYPE = {'taxi_out': 'departure', 'pre-taxi_out': 'departure', 'takeoff': 'departure', 'climbout': 'departure', + 'approach': 'arrival', 'taxi_in': 'arrival', 'post-taxi_in': 'arrival', 'landing': 'arrival', + 'landing_wear': 'arrival'} +PHASE_EF_FILE = {'taxi_out': 'ef_taxi.csv', 'pre-taxi_out': 'ef_apu.csv', 'takeoff': 'ef_takeoff.csv', + 'climbout': 'ef_climbout.csv', 'approach': 'ef_approach.csv', 'taxi_in': 'ef_taxi.csv', + 'post-taxi_in': 'ef_apu.csv', 'landing': 'ef_approach.csv', 'landing_wear': 'ef_landing_wear.csv'} + + +class AviationSector(Sector): + """ + The aviation module divide the emissions into 9 emission phases (4 for departure and 5 for arrival) + - Departure: + - Pre-taxi out + - Taxi out + - Take off + - Climb out + - Arrival: + - Final approach + - Landing + - Landing wear + - Taxi in + - Post-taxi in + """ + def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + airport_list, plane_list, airport_shapefile_path, airport_runways_shapefile_path, + airport_runways_corners_shapefile_path, airport_trajectories_shapefile_path, operations_path, + planes_path, times_path, ef_dir, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path): + """ + :param comm: Communicator for the sector calculation. + :type comm: MPI.COMM + + :param logger: Logger + :type logger: Log + + :param auxiliary_dir: Path to the directory where the necessary auxiliary files will be created if them are not + created yet. + :type auxiliary_dir: str + + :param grid_shp: Shapefile with the grid horizontal distribution. + :type grid_shp: GeoDataFrame + + :param date_array: List of datetimes. + :type date_array: list(datetime.datetime, ...) + + :param source_pollutants: List of input pollutants to take into account. + :type source_pollutants: list + + :param vertical_levels: List of top level of each vertical layer. + :type vertical_levels: list + + :param airport_list: List of airports to take into account. + :type airport_list: list + + :param plane_list: List of planes to take into account. + :type plane_list: list + + :param airport_shapefile_path: Path to the shapefile that contains the airport polygons. + :type airport_shapefile_path: str + + :param airport_runways_shapefile_path: Path to the shapefile that contains the runways lines. + :type airport_runways_shapefile_path: str + + :param airport_runways_corners_shapefile_path: Path to the shapefile that contains the runway starting points. + :type airport_runways_corners_shapefile_path: str + + :param airport_trajectories_shapefile_path: Path to the shapefile that contains the trajectories lines. + :type airport_trajectories_shapefile_path: str + + :param operations_path: Path to the CSV that contains the operations information by month. + columns: plane_id, airport_id, operation, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 + :type operations_path: str + + :param planes_path: Path to the CSV that contains the planes information: + columns: plane_id, engine_id, engine_n, mtow, apu_id, plane_type + :type planes_path: str + + :param times_path: Path to the CSV that contains the times information. + columns: airport_id, plane_type, taxi_out, taxi_in, takeoff, climbout, approach, landing, post-taxi_in, + pre-taxi_out + :type times_path: str + + :param ef_dir: Path to the directory that contains all the emission factors files. That folder must contain the + following emission factor files: ef_approach.csv, ef_apu.csv, ef_climbout.csv, ef_landing.csv, + ef_landing_wear.csv, ef_takeoff.csv and ef_taxi.csv. + :type ef_dir: str + + :param weekly_profiles_path: Path to the CSV file that contains all the weekly profiles. The CSV file must + contain the following columns [P_week, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday] + The P_week code have to be the input pollutant. + :type weekly_profiles_path: str + + :param hourly_profiles_path: Path to the CSV file that contains all the hourly profiles. The CSV file must + contain the following columns [P_hour, 0, 1, 2, 3, ..., 22, 23] + The P_week code have to be the input pollutant. + :type hourly_profiles_path: str + + :param speciation_map_path: Path to the CSV file that contains the speciation map. The CSV file must contain + the following columns [dst, src, description] + The 'dst' column will be used as output pollutant list and the 'src' column as their onw input pollutant + to be used as a fraction in the speciation profiles. + :type speciation_map_path: str + + :param speciation_profiles_path: Path to the file that contains all the speciation profiles. The CSV file + must contain the "Code" column with the value of each animal of the animal_list. The rest of columns + have to be the sames as the column 'dst' of the 'speciation_map_path' file. + :type speciation_profiles_path: str + + :param molecular_weights_path: Path to the CSV file that contains all the molecular weights needed. The CSV + file must contain the 'Specie' and 'MW' columns. + :type molecular_weights_path: str + """ + spent_time = timeit.default_timer() + logger.write_log('===== AVIATION SECTOR =====') + super(AviationSector, self).__init__( + comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, None, + weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, + molecular_weights_path) + + if 'hc' in self.source_pollutants: + for poll in ['nmvoc', 'ch4']: + if poll not in self.source_pollutants: + self.source_pollutants.append(poll) + self.source_pollutants.remove('hc') + + # self.ef_dir = ef_dir + self.ef_files = self.read_ef_files(ef_dir) + + airport_trajectories_shapefile = self.read_trajectories_shapefile( + airport_trajectories_shapefile_path, airport_runways_corners_shapefile_path, airport_runways_shapefile_path) + self.airport_list_full = None # only needed for master process + self.airport_list = self.get_airport_list(airport_list, airport_trajectories_shapefile, operations_path) + self.plane_list = plane_list + + full_airport_shapefile = gpd.read_file(airport_shapefile_path) + full_airport_shapefile.drop(columns='airport_na', inplace=True) + full_airport_shapefile.set_index('airport_id', inplace=True) + self.airport_shapefile = full_airport_shapefile.loc[self.airport_list, ['geometry']] + + self.operations = self.read_operations_update_plane_list(operations_path) + self.planes_info = self.read_planes(planes_path) + self.times_info = self.read_times_info(times_path) + + runway_shapefile = self.read_runway_shapefile(airport_runways_shapefile_path) + self.airport_distribution = self.calculate_airport_distribution(full_airport_shapefile) + self.runway_arrival_distribution = self.calculate_runway_distribution(runway_shapefile, 'arrival') + self.runway_departure_distribution = self.calculate_runway_distribution(runway_shapefile, 'departure') + + self.trajectory_departure_distribution = self.calculate_trajectories_distribution( + airport_trajectories_shapefile, 'departure') + self.trajectory_arrival_distribution = self.calculate_trajectories_distribution( + airport_trajectories_shapefile, 'arrival') + comm.Barrier() + self.logger.write_time_log('AviationSector', '__init__', timeit.default_timer() - spent_time) + + def read_ef_files(self, ef_path): + if self.comm.Get_rank() == 0: + ef_files = {} + for phase in PHASE_TYPE.keys(): + ef_files[phase] = pd.read_csv(os.path.join(ef_path, PHASE_EF_FILE[phase])) + else: + ef_files = None + + ef_files = self.comm.bcast(ef_files, root=0) + + return ef_files + + def read_trajectories_shapefile(self, trajectories_path, runways_corners_path, runways_path): + """ + Create a shapefile with 2 geometries: trajectories & staring point + + :param trajectories_path: Path to the trajectories shapefile. + :type trajectories_path: str + + :param runways_corners_path: Path to the trajectories starting point path. + :type runways_corners_path: str + + :param runways_path: Path to the shapefile that contains the runways and their fraction of use. + :type runways_path: str + + :return: GeoDataFrame with the trajectories information, their praction and staring point. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + trajectories = gpd.read_file(trajectories_path) + + corners = gpd.read_file(runways_corners_path).to_crs(trajectories.crs) + corners.rename(columns={'geometry': 'start_point'}, inplace=True) + + runways = gpd.read_file(runways_path).to_crs(trajectories.crs) + runways.rename(columns={'approach_f': 'arrival_f', 'climbout_f': 'departure_f'}, inplace=True) + + trajectories = trajectories.merge(corners[['runway_id', 'start_point']], on='runway_id', how='left') + trajectories = trajectories.merge(runways[['runway_id', 'arrival_f', 'departure_f']], on='runway_id', + how='left') + trajectories.loc[trajectories['operation'] == 'departure', 'fraction'] = trajectories['departure_f'] + trajectories.loc[trajectories['operation'] == 'arrival', 'fraction'] = trajectories['arrival_f'] + + trajectories.drop(columns=['arrival_f', 'departure_f'], inplace=True) + trajectories.set_index(['runway_id', 'operation'], inplace=True) + self.logger.write_time_log('AviationSector', 'read_trajectories_shapefile', timeit.default_timer() - spent_time) + + return trajectories + + def read_runway_shapefile(self, airport_runways_shapefile_path): + """ + The master process reads the runway shapefile. + + :param airport_runways_shapefile_path: Path to the shapefile that contains the runways. + :type airport_runways_shapefile_path: str + + :return: GeoDataFrame with the runways information. + :rtype: GeoDataFrame, None + """ + spent_time = timeit.default_timer() + if self.comm.Get_rank() == 0: + runway_shapefile = gpd.read_file(airport_runways_shapefile_path) + runway_shapefile.set_index('airport_id', inplace=True) + runway_shapefile = runway_shapefile.loc[self.airport_list_full, :] + runway_shapefile = runway_shapefile.loc[runway_shapefile['cons'] == 1, + ['approach_f', 'climbout_f', 'geometry']] + runway_shapefile.rename(columns={'approach_f': 'arrival_f', 'climbout_f': 'departure_f'}, inplace=True) + else: + runway_shapefile = None + self.logger.write_time_log('AviationSector', 'read_runway_shapefile', timeit.default_timer() - spent_time) + + return runway_shapefile + + def read_hourly_profiles(self, path): + """ + Read the Dataset of the hourly profiles with the hours (int) as columns. + + Overwrites the super method. + + :param path: Path to the file that contains the monthly profiles. + :type path: str + + :return: Dataset od the monthly profiles. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + if path is None: + profiles = None + else: + profiles = pd.read_csv(path) + profiles.rename( + columns={"operation": -3, "day_type": -2, 'P_hour': -1, '00': 0, '01': 1, '02': 2, '03': 3, '04': 4, + '05': 5, '06': 6, '07': 7, '08': 8, '09': 9, '10': 10, '11': 11, '12': 12, '13': 13, '14': 14, + '15': 15, '16': 16, '17': 17, '18': 18, '19': 19, '20': 20, '21': 21, '22': 22, '23': 23}, + inplace=True) + profiles.columns = profiles.columns.astype(int) + profiles.rename(columns={-1: 'P_hour', -3: "operation", -2: "day_type"}, inplace=True) + profiles.set_index(["P_hour", "operation", "day_type"], inplace=True) + + self.logger.write_time_log('AviationSector', 'read_hourly_profiles', timeit.default_timer() - spent_time) + + return profiles + + def read_operations_update_plane_list(self, operations_csv_path): + """ + Read the operations CSV file and update the plane_list argument. + + If plane_list is not set in the configuration file it will be set using the plane_codes of the selected + airports. + + :param operations_csv_path: Path to the CSV that contains the operations information by plane, airport, and + phase. The cSC must contain the following columns: [plane_id, airport_id, operation, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12] with the number of operations by month. + :type operations_csv_path: str + + :return: DataFrame with the amount operations by month. The operations are detailed with the plane_code, airport + and phase. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + check = False + operations = pd.read_csv(operations_csv_path) + + if check: + for index, aux_operations in operations.groupby(['airport_id', 'plane_id', 'operation']): + if len(aux_operations) > 1: + print index, len(aux_operations) + if self.plane_list is None: + self.plane_list = list(np.unique(operations['plane_id'].values)) + else: + operations = operations.loc[operations['plane_id'].isin(self.plane_list), :] + + if len(operations) == 0: + raise NameError("The plane/s defined in the plane_list do not exist.") + operations = operations.loc[operations['airport_id'].isin(self.airport_list), :] + operations.set_index(['airport_id', 'plane_id', 'operation'], inplace=True) + operations.rename(columns={'1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, + '11': 11, '12': 12}, inplace=True) + self.logger.write_time_log('AviationSector', 'read_operations_update_plane_list', + timeit.default_timer() - spent_time) + + return operations + + def read_planes(self, planes_path): + """ + Read the CSV with the planes information. + + :param planes_path: Path to the CSV file that contains the planes information. + :type planes_path: str + + :return: Dataframe with the planes information + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + check = False + dataframe = pd.read_csv(planes_path) + dataframe = dataframe.loc[dataframe['plane_id'].isin(self.plane_list)] + if check: + for index, aux_operations in dataframe.groupby('plane_id'): + if len(aux_operations) > 1: + print index, len(aux_operations) + dataframe.set_index('plane_id', inplace=True) + self.logger.write_time_log('AviationSector', 'read_planes', timeit.default_timer() - spent_time) + + return dataframe + + def read_times_info(self, times_path): + """ + Read the CSV file that contains the time spent on each phase. + + :param times_path: Path to the CSV file that contains the time spent on each phase. + :type times_path: str + + :return: Dataframe with the times of each phase + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + dataframe = pd.read_csv(times_path) + dataframe = dataframe.loc[dataframe['airport_id'].isin(self.airport_list)] + dataframe.set_index(['airport_id', 'plane_type'], inplace=True) + self.logger.write_time_log('AviationSector', 'read_times_info', timeit.default_timer() - spent_time) + + return dataframe + + def get_airport_list(self, conf_airport_list, airport_shapefile, operations_file): + """ + Get the airport list from the involved airports on the domain. + + It will select only the involved airports that are inside the grid. + If the argument 'airport_list' is set in the configuration file it will use the ones of that list that are into + the grid. + + :param conf_airport_list: List of airport codes from the configuration file (or None). + :type conf_airport_list: list + + :param airport_shapefile: Shapefile with the 'ICAO' information. + :type airport_shapefile: GeoDataFrame + + :return: List with the airports to calculate. + :rtype: list + """ + spent_time = timeit.default_timer() + if self.comm.Get_rank() == 0: + airport_shapefile = airport_shapefile.reset_index() + airport_shapefile = gpd.sjoin(airport_shapefile.to_crs(self.grid_shp.crs), + self.clip.shapefile.to_crs(self.grid_shp.crs), how='inner', op='intersects') + + shp_airport_list = list(np.unique(airport_shapefile['airport_id'].values)) + + if conf_airport_list is not None: + shp_airport_list = list(set(conf_airport_list).intersection(shp_airport_list)) + + if len(shp_airport_list) == 0: + raise NameError("No airports intersect with the defined domain or the defined aiport/s in the " + + "airport_list do no exist ") + + airports_with_operations = np.unique(pd.read_csv(operations_file, usecols=['airport_id']).values) + + new_list = list(set(shp_airport_list) & set(airports_with_operations)) + if len(new_list) != len(shp_airport_list): + warn('{0} airports have no operations. Ignoring them.'.format( + list(set(new_list) - set(shp_airport_list)))) + + max_len = len(new_list) + # Only for master (rank == 0) + self.airport_list_full = new_list + + new_list = [new_list[i * len(new_list) // self.comm.size: (i + 1) * len(new_list) // self.comm.size] + for i in range(self.comm.size)] + for sublist in new_list: + if len(sublist) == 0: + raise ValueError("ERROR: The selected number of processors is to high. " + + "The maximum number of processors accepted are {0}".format(max_len) + + "(Maximum number of airports included in the working domain") + else: + new_list = None + + new_list = self.comm.scatter(new_list, root=0) + self.logger.write_time_log('AviationSector', 'get_airport_list', timeit.default_timer() - spent_time) + + return new_list + + def calculate_airport_distribution(self, airport_shapefile): + """ + Calculate the location and portion for the emissions that have to be distributed on the airport polygon. + + It only need to be calculated once. the second execution will read the auxiliary file already created. + All the emissions that have to be distributed on the airport polygon goes to the surface layer. + + :param airport_shapefile: Shapefile with the airport polygon geometries. + :type airport_shapefile: GeoDataFrame + + :return: DataFrame with the location (FID) and fraction for each airport. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + self.logger.write_log('\t\tCalculating airport distribution', message_level=2) + airport_distribution_path = os.path.join(self.auxiliary_dir, 'aviation', 'airport_distribution.csv') + + if not os.path.exists(airport_distribution_path): + if self.comm.rank == 0: + airport_shapefile = airport_shapefile.loc[self.airport_list_full, :].copy() + if not os.path.exists(os.path.dirname(airport_distribution_path)): + os.makedirs(os.path.dirname(airport_distribution_path)) + airport_shapefile.to_crs(self.grid_shp.crs, inplace=True) + airport_shapefile['area'] = airport_shapefile.area + airport_distribution = self.spatial_overlays(airport_shapefile, self.grid_shp.reset_index(), + how='intersection') + airport_distribution['fraction'] = airport_distribution.area / airport_distribution['area'] + airport_distribution.drop(columns=['idx2', 'area', 'geometry', 'cons'], inplace=True) + airport_distribution.rename(columns={'idx1': 'airport_id'}, inplace=True) + airport_distribution['layer'] = 0 + airport_distribution.set_index(['airport_id', 'FID', 'layer'], inplace=True) + + airport_distribution.to_csv(airport_distribution_path) + else: + airport_distribution = None + airport_distribution = self.comm.bcast(airport_distribution, root=0) + else: + airport_distribution = pd.read_csv(airport_distribution_path) + airport_distribution.set_index(['airport_id', 'FID', 'layer'], inplace=True) + self.logger.write_time_log('AviationSector', 'calculate_airport_distribution', + timeit.default_timer() - spent_time) + + return airport_distribution + + def calculate_runway_distribution(self, runway_shapefile, phase_type): + """ + Calculate the location and portion for the emissions that have to be distributed on the runway lines. + + It only need to be calculated once. the second execution will read the auxiliary file already created. + All the emissions that have to be distributed on the runway line goes to the surface layer. + + :param runway_shapefile: Shapefile with the runway line geometries. + :type runway_shapefile: GeoDataFrame + + :param phase_type: Phase type to distribute. Arrival or Departure. + :type phase_type: str + + :return: DataFrame with the location (FID) and fraction for each airport. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + def get_intersection_length(row): + intersection = row.get('geometry_x').intersection(row.get('geometry_y')) + return intersection.length + + def normalize(df): + total_fraction = df['{0}_f'.format(phase_type)].values.sum() + df['{0}_f'.format(phase_type)] = df['{0}_f'.format(phase_type)] / total_fraction + return df.loc[:, ['{0}_f'.format(phase_type)]] + + self.logger.write_log('\t\tCalculating runway distribution for {0}'.format(phase_type), message_level=2) + + runway_distribution_path = os.path.join( + self.auxiliary_dir, 'aviation', 'runway_{0}_distribution.csv'.format(phase_type)) + + if not os.path.exists(runway_distribution_path): + if self.comm.rank == 0: + runway_shapefile['{0}_f'.format(phase_type)] = runway_shapefile.groupby('airport_id').apply(normalize) + if not os.path.exists(os.path.dirname(runway_distribution_path)): + os.makedirs(os.path.dirname(runway_distribution_path)) + runway_shapefile.reset_index(inplace=True) + runway_shapefile.to_crs(self.grid_shp.crs, inplace=True) + runway_shapefile['length'] = runway_shapefile.length + # duplicating each runway by involved cell + runway_shapefile = gpd.sjoin(runway_shapefile, self.grid_shp.reset_index(), how="inner", + op='intersects') + # Adding cell geometry + runway_shapefile = runway_shapefile.merge(self.grid_shp.reset_index().loc[:, ['FID', 'geometry']], + on='FID', how='left') + # Intersection between line (roadway) and polygon (cell) + # runway_shapefile['geometry'] = runway_shapefile.apply(do_intersection, axis=1) + runway_shapefile['mini_length'] = runway_shapefile.apply(get_intersection_length, axis=1) + + runway_shapefile.drop(columns=['geometry_x', 'geometry_y', 'index_right'], inplace=True) + + runway_shapefile['fraction'] = runway_shapefile['{0}_f'.format(phase_type)].multiply( + runway_shapefile['mini_length'] / runway_shapefile['length']) + + runway_shapefile['layer'] = 0 + runway_shapefile = runway_shapefile[['airport_id', 'FID', 'layer', 'fraction']] + runway_shapefile = runway_shapefile.groupby(['airport_id', 'FID', 'layer']).sum() + # runway_shapefile.set_index(['airport_id', 'FID', 'layer'], inplace=True) + runway_shapefile.to_csv(runway_distribution_path) + else: + runway_shapefile = None + runway_shapefile = self.comm.bcast(runway_shapefile, root=0) + else: + runway_shapefile = pd.read_csv(runway_distribution_path) + runway_shapefile.set_index(['airport_id', 'FID', 'layer'], inplace=True) + self.logger.write_time_log('AviationSector', 'calculate_runway_distribution', + timeit.default_timer() - spent_time) + + return runway_shapefile + + def calculate_trajectories_distribution(self, airport_trajectories_shapefile, phase_type): + """ + Calculate the location and portion for the emissions that have to be distributed on the trajectories lines. + + It only need to be calculated once. the second execution will read the auxiliary file already created. + That emissions have to be distributed also vertically. + + :param airport_trajectories_shapefile: Shapefile with the trajectories information. + :type airport_trajectories_shapefile: GeoDataFrame + + :param phase_type: 'arrival' or 'departure' to indicate teh type of approach. + :type phase_type: str + + :return: DataFrame with the location (FID & level) and fraction for each airport. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + def get_vertical_intersection_length(row): + circle = row.get('start_point').buffer(row.get('circle_radious')) + return row.get('src_geometry').intersection(circle).length + + def get_horizontal_intersection_length(row): + return row.get('geometry_x').intersection(row.get('geometry_y')).length + + def do_vertical_intersection(row): + circle = row.get('start_point').buffer(row.get('circle_radious')) + return row.get('src_geometry').intersection(circle) + + def do_horizontal_intersection(row): + return row.get('geometry_x').intersection(row.get('geometry_y')) + + def do_difference(row): + circle = row.get('start_point').buffer(row.get('circle_radious')) + return row.get('src_geometry').difference(circle) + + def normalize(df): + total_fraction = df['fraction'].values.sum() + df['fraction'] = df['fraction'] / total_fraction + return df.loc[:, ['fraction']] + + self.logger.write_log('\t\tCalculating trajectories distribution for {0}'.format(phase_type), message_level=2) + trajectories_distribution_path = os.path.join( + self.auxiliary_dir, 'aviation', 'trajectories_{0}_distribution.csv'.format(phase_type)) + + if not os.path.exists(trajectories_distribution_path): + if self.comm.rank == 0: + if not os.path.exists(os.path.dirname(trajectories_distribution_path)): + os.makedirs(os.path.dirname(trajectories_distribution_path)) + # Filtering shapefile + airport_trajectories_shapefile = airport_trajectories_shapefile.xs(phase_type, level='operation').copy() + airport_trajectories_shapefile = airport_trajectories_shapefile.loc[ + airport_trajectories_shapefile['airport_id'].isin( + self.airport_list_full), + :] + airport_trajectories_shapefile['fraction'] = airport_trajectories_shapefile.groupby('airport_id').apply( + normalize) + + # VERTICAL DISTRIBUTION + airport_trajectories_shapefile['length'] = airport_trajectories_shapefile['geometry'].length + trajectories_distr = [] + for level, v_lev in enumerate(self.vertical_levels): + dataframe = airport_trajectories_shapefile.copy() + dataframe.rename(columns={'geometry': 'src_geometry'}, inplace=True) + dataframe['layer'] = level + dataframe['circle_radious'] = (float(v_lev) / 1000.) * dataframe['length'] + dataframe['geometry'] = dataframe[['src_geometry', 'start_point', 'circle_radious']].apply( + do_vertical_intersection, axis=1) + trajectories_distr.append(dataframe[['airport_id', 'fraction', 'length', 'layer', 'geometry']]) + airport_trajectories_shapefile['geometry'] = dataframe[ + ['src_geometry', 'start_point', 'circle_radious']].apply(do_difference, axis=1) + if v_lev > 1000: + break + trajectories_distr = gpd.GeoDataFrame(pd.concat(trajectories_distr), geometry='geometry', + crs=airport_trajectories_shapefile.crs) + trajectories_distr.reset_index(inplace=True) + + # HORIZONTAL DISTRIBUTION + aux_grid = self.grid_shp.to_crs(trajectories_distr.crs).reset_index() + # trajectories_distr.to_crs(self.grid_shp.crs, inplace=True) + # duplicating each runway by involved cell + trajectories_distr = gpd.sjoin(trajectories_distr, aux_grid, how="inner", op='intersects') + # Adding cell geometry + trajectories_distr = trajectories_distr.merge(aux_grid.loc[:, ['FID', 'geometry']], on='FID', + how='left') + # Intersection between line (roadway) and polygon (cell) + trajectories_distr['geometry'] = trajectories_distr.apply(do_horizontal_intersection, axis=1) + trajectories_distr['mini_h_length'] = trajectories_distr.apply(get_horizontal_intersection_length, + axis=1) + trajectories_distr.drop(columns=['geometry_x', 'geometry_y', 'index_right'], inplace=True) + + trajectories_distr['fraction'] = trajectories_distr['fraction'].multiply( + trajectories_distr['mini_h_length'] / trajectories_distr['length']) + + trajectories_distr = trajectories_distr[['airport_id', 'FID', 'layer', 'fraction']] + trajectories_distr = trajectories_distr.groupby(['airport_id', 'FID', 'layer']).sum() + + trajectories_distr.to_csv(trajectories_distribution_path) + else: + trajectories_distr = None + trajectories_distr = self.comm.bcast(trajectories_distr, root=0) + else: + trajectories_distr = pd.read_csv(trajectories_distribution_path) + trajectories_distr.set_index(['airport_id', 'FID', 'layer'], inplace=True) + self.logger.write_time_log('AviationSector', 'calculate_trajectories_distribution', + timeit.default_timer() - spent_time) + + return trajectories_distr + + def get_main_engine_emission(self, phase): + """ + Calculate the main engine emissions for the given phase. + + :param phase: Phase to calculate. + :type phase: str + + :return: Dataframe with the emissions of the phase py airport. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + def get_e(df): + """ + Number of engines associated to each airport + """ + df['E'] = self.planes_info.loc[df.name, 'engine_n'] + return df.loc[:, ['E']] + + def get_t(df): + """ + Time spent by each aircraft to complete tha selected phase (s) + """ + plane_type = self.planes_info.loc[df.name[1], 'plane_type'] + df['t'] = self.times_info.loc[(df.name[0], plane_type), phase] + return df.loc[:, ['t']] + + def get_ef(df, poll): + """ + Emission factor associated to phase and pollutant + """ + engine = self.planes_info.loc[df.name, 'engine_id'] + # ef_dataframe = pd.read_csv(os.path.join(self.ef_dir, PHASE_EF_FILE[phase])) + ef_dataframe = self.ef_files[phase].reset_index() + ef_dataframe.set_index('engine_id', inplace=True) + df['EF'] = ef_dataframe.loc[engine, poll] + return df.loc[:, ['EF']] + + def get_n(df): + """ + Number of monthly operations associated to each aircraft, phase and month + """ + self.operations = self.operations.sort_index() + df['N'] = self.operations.loc[(df.name[0], df.name[1], PHASE_TYPE[phase]), df.name[2]] + return df.loc[:, ['N']] + + def get_wf(df): + """ + Daily factor associated to weekday and airport + """ + import datetime + date_np = df.head(1)['date'].values[0] + date = datetime.datetime.utcfromtimestamp(date_np.astype(int) * 1e-9) + profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc[df.name[0], :].to_dict(), date) + for weekday in np.unique(df['weekday'].values): + df.loc[df['weekday'] == weekday, 'WF'] = profile[weekday] + return df.loc[:, ['WF']] + + def get_hf(df): + """ + Hourly factor associated to hour, + """ + operation = PHASE_TYPE[phase] + if df.name[2] > 4: + day_type = 'weekend' + else: + day_type = 'weekday' + df['HF'] = self.hourly_profiles.loc[(df.name[0], operation, day_type), df.name[1]] + return df.loc[:, ['HF']] + + # Merging operations with airport geometry + dataframe = pd.DataFrame(index=self.operations.xs(PHASE_TYPE[phase], level='operation').index) + + dataframe = dataframe.reset_index().set_index('airport_id') + dataframe = self.airport_shapefile.join(dataframe, how='inner') + dataframe.index.name = 'airport_id' + dataframe = dataframe.reset_index().set_index(['airport_id', 'plane_id']) + + dataframe['E'] = dataframe.groupby('plane_id').apply(get_e) + dataframe['t'] = dataframe.groupby(['airport_id', 'plane_id']).apply(get_t) + + # Dates + dataframe = self.add_dates(dataframe) + dataframe['month'] = dataframe['date'].dt.month + dataframe['weekday'] = dataframe['date'].dt.weekday + dataframe['hour'] = dataframe['date'].dt.hour + + dataframe['N'] = dataframe.groupby(['airport_id', 'plane_id', 'month']).apply(get_n) + dataframe['WF'] = dataframe.groupby(['airport_id', 'month']).apply(get_wf) + dataframe['HF'] = dataframe.groupby(['airport_id', 'hour', 'weekday']).apply(get_hf) + dataframe.drop(columns=['date', 'month', 'weekday', 'hour'], inplace=True) + + # Getting factor + dataframe['f'] = dataframe['E'] * dataframe['t'] * dataframe['N'] * dataframe['WF'] * dataframe['HF'] + dataframe.drop(columns=['E', 't', 'N', 'WF', 'HF'], inplace=True) + + for pollutant in self.source_pollutants: + if pollutant not in ['nmvoc', 'ch4']: + dataframe[pollutant] = dataframe.groupby('plane_id').apply(lambda x: get_ef(x, pollutant)) + dataframe[pollutant] = dataframe[pollutant] * dataframe['f'] + + dataframe.drop(columns=['f', 'plane_id', 'geometry'], inplace=True) + dataframe = dataframe.groupby(['airport_id', 'tstep']).sum() + self.logger.write_time_log('AviationSector', 'get_main_engine_emission', timeit.default_timer() - spent_time) + + return dataframe + + def get_tyre_and_brake_wear_emission(self, phase): + """ + Calculate the tyre and brake wear emissions for the given phase. + + :param phase: Phase to calculate. + :type phase: str + + :return: Dataframe with the emissions of the phase py airport. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + def get_mtow(df): + """ + Maximum take-off weight associated to aircraft + """ + df['MTOW'] = self.planes_info.loc[df.name, 'mtow'] + return df.loc[:, ['MTOW']] + + def get_ef(poll): + """ + Emission factor associated to phase and pollutant + """ + # ef_dataframe = pd.read_csv(os.path.join(self.ef_dir, PHASE_EF_FILE[phase])) + ef_dataframe = self.ef_files[phase].reset_index() + ef_dataframe.set_index('plane_id', inplace=True) + ef = ef_dataframe.loc['default', poll] + return ef + + def get_n(df): + """ + Number of monthly operations associated to each aircraft, phase and month + """ + self.operations = self.operations.sort_index() + df['N'] = self.operations.loc[(df.name[0], df.name[1], PHASE_TYPE[phase]), df.name[2]] + return df.loc[:, ['N']] + + def get_wf(df): + """ + Daily factor associated to weekday and airport + """ + import datetime + date_np = df.head(1)['date'].values[0] + date = datetime.datetime.utcfromtimestamp(date_np.astype(int) * 1e-9) + profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc[df.name[0], :].to_dict(), date) + for weekday in np.unique(df['weekday'].values): + df.loc[df['weekday'] == weekday, 'WF'] = profile[weekday] + return df.loc[:, ['WF']] + + def get_hf(df): + """ + Hourly factor associated to hour, + """ + operation = PHASE_TYPE[phase] + if df.name[2] > 4: + day_type = 'weekend' + else: + day_type = 'weekday' + df['HF'] = self.hourly_profiles.loc[(df.name[0], operation, day_type), df.name[1]] + return df.loc[:, ['HF']] + + # Merging operations with airport geometry + dataframe = pd.DataFrame(index=self.operations.xs(PHASE_TYPE[phase], level='operation').index) + dataframe = dataframe.reset_index().set_index('airport_id') + dataframe = self.airport_shapefile.join(dataframe, how='inner') + dataframe.index.name = 'airport_id' + dataframe = dataframe.reset_index().set_index(['airport_id', 'plane_id']) + + dataframe['MTOW'] = dataframe.groupby('plane_id').apply(get_mtow) + + # Dates + dataframe = self.add_dates(dataframe) + dataframe['month'] = dataframe['date'].dt.month + dataframe['weekday'] = dataframe['date'].dt.weekday + dataframe['hour'] = dataframe['date'].dt.hour + + dataframe['N'] = dataframe.groupby(['airport_id', 'plane_id', 'month']).apply(get_n) + dataframe['WF'] = dataframe.groupby(['airport_id', 'month']).apply(get_wf) + dataframe['HF'] = dataframe.groupby(['airport_id', 'hour', 'weekday']).apply(get_hf) + dataframe.drop(columns=['date', 'month', 'weekday', 'hour'], inplace=True) + + # Getting factor + dataframe['f'] = dataframe['MTOW'] * dataframe['N'] + + dataframe['f'] = dataframe['MTOW'] * dataframe['N'] * dataframe['WF'] * dataframe['HF'] + dataframe.drop(columns=['MTOW', 'N', 'WF', 'HF'], inplace=True) + + for pollutant in self.source_pollutants: + if pollutant in ['pm10', 'pm25']: + dataframe[pollutant] = get_ef(pollutant) + dataframe[pollutant] = dataframe[pollutant] * dataframe['f'] + + dataframe.drop(columns=['f', 'plane_id', 'geometry'], inplace=True) + dataframe = dataframe.groupby(['airport_id', 'tstep']).sum() + self.logger.write_time_log('AviationSector', 'get_tyre_and_brake_wear_emission', + timeit.default_timer() - spent_time) + + return dataframe + + def get_auxiliary_power_unit_emission(self, phase): + """ + Calculate the auxiliary power unit (APU) emissions for the given phase. + + :param phase: Phase to calculate. + :type phase: str + + :return: Dataframe with the emissions of the phase py airport. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + def get_t(df): + """ + Time spent by each aircraft to complete tha selected phase (s) + """ + plane_type = self.planes_info.loc[df.name[1], 'plane_type'] + df['t'] = self.times_info.loc[(df.name[0], plane_type), phase] + return df.loc[:, ['t']] + + def get_ef(df, poll): + """ + Emission factor associated to phase and pollutant + """ + engine = self.planes_info.loc[df.name, 'apu_id'] + # ef_dataframe = pd.read_csv(os.path.join(self.ef_dir, PHASE_EF_FILE[phase])) + ef_dataframe = self.ef_files[phase].reset_index() + ef_dataframe.set_index('apu_id', inplace=True) + try: + df['EF'] = ef_dataframe.loc[engine, poll] + except (TypeError, KeyError): + # Occurs when the plane has not APU + df['EF'] = 0 + return df.loc[:, ['EF']] + + def get_n(df): + """ + Number of monthly operations associated to each aircraft, phase and month + """ + self.operations = self.operations.sort_index() + df['N'] = self.operations.loc[(df.name[0], df.name[1], PHASE_TYPE[phase]), df.name[2]] + return df.loc[:, ['N']] + + def get_wf(df): + """ + Daily factor associated to weekday and airport + """ + import datetime + date_np = df.head(1)['date'].values[0] + date = datetime.datetime.utcfromtimestamp(date_np.astype(int) * 1e-9) + profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc[df.name[0], :].to_dict(), date) + for weekday in np.unique(df['weekday'].values): + df.loc[df['weekday'] == weekday, 'WF'] = profile[weekday] + return df.loc[:, ['WF']] + + def get_hf(df): + """ + Hourly factor associated to hour, + """ + operation = PHASE_TYPE[phase] + if df.name[2] > 4: + day_type = 'weekend' + else: + day_type = 'weekday' + df['HF'] = self.hourly_profiles.loc[(df.name[0], operation, day_type), df.name[1]] + return df.loc[:, ['HF']] + + # Merging operations with airport geometry + dataframe = pd.DataFrame(index=self.operations.xs(PHASE_TYPE[phase], level='operation').index) + dataframe = dataframe.reset_index().set_index('airport_id') + dataframe = self.airport_shapefile.join(dataframe, how='inner') + dataframe.index.name = 'airport_id' + dataframe = dataframe.reset_index().set_index(['airport_id', 'plane_id']) + + dataframe['t'] = dataframe.groupby(['airport_id', 'plane_id']).apply(get_t) + + # Dates + dataframe = self.add_dates(dataframe) + dataframe['month'] = dataframe['date'].dt.month + dataframe['weekday'] = dataframe['date'].dt.weekday + dataframe['hour'] = dataframe['date'].dt.hour + + dataframe['N'] = dataframe.groupby(['airport_id', 'plane_id', 'month']).apply(get_n) + dataframe['WF'] = dataframe.groupby(['airport_id', 'month']).apply(get_wf) + dataframe['HF'] = dataframe.groupby(['airport_id', 'hour', 'weekday']).apply(get_hf) + dataframe.drop(columns=['date', 'month', 'weekday', 'hour'], inplace=True) + + # Getting factor + dataframe['f'] = dataframe['t'] * dataframe['N'] * dataframe['WF'] * dataframe['HF'] + dataframe.drop(columns=['t', 'N', 'WF', 'HF'], inplace=True) + + for pollutant in self.source_pollutants: + if pollutant not in ['nmvoc', 'ch4']: + dataframe[pollutant] = dataframe.groupby('plane_id').apply(lambda x: get_ef(x, pollutant)) + dataframe[pollutant] = dataframe[pollutant] * dataframe['f'] + + dataframe.drop(columns=['f', 'plane_id', 'geometry'], inplace=True) + dataframe = dataframe.groupby(['airport_id', 'tstep']).sum() + self.logger.write_time_log('AviationSector', 'get_auxiliary_power_unit_emission', + timeit.default_timer() - spent_time) + + return dataframe + + def distribute(self, dataframe, distribution): + """ + Distributes the airport emissions by the given distribution. + + :param dataframe: Emissions by airport. + :type dataframe: DataFrame + + :param distribution: Involved cells by airport. + :type distribution: DataFrame + + :return: Emissions distributed by cell (FID) + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + pollutants = dataframe.columns.values + dataframe.reset_index(inplace=True) + distribution.reset_index(inplace=True) + + dataframe = dataframe.merge(distribution, on='airport_id') + + dataframe[pollutants] = dataframe[pollutants].multiply(dataframe['fraction'], axis=0) + dataframe.drop(columns=['airport_id', 'fraction'], inplace=True) + dataframe = dataframe.groupby(['FID', 'layer', 'tstep']).sum() + self.logger.write_time_log('AviationSector', 'distribute', timeit.default_timer() - spent_time) + + return dataframe + + def calculate_emissions(self): + """ + Main function to calculate the emissions for the Landing and take off airport emissions. + + :return: Airport emissions distributed by cell (FID), layer and time step. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + taxi_out = self.get_main_engine_emission('taxi_out') + self.logger.write_log('\t\tTaxi out emissions calculated.', message_level=2) + taxi_in = self.get_main_engine_emission('taxi_in') + self.logger.write_log('\t\tTaxi in emissions calculated.', message_level=2) + takeoff = self.get_main_engine_emission('takeoff') + self.logger.write_log('\t\tTake off emissions calculated.', message_level=2) + climbout = self.get_main_engine_emission('climbout') + self.logger.write_log('\t\tClimb out emissions calculated.', message_level=2) + approach = self.get_main_engine_emission('approach') + self.logger.write_log('\t\tApproach emissions calculated.', message_level=2) + landing = self.get_main_engine_emission('landing') + self.logger.write_log('\t\tLanding emissions calculated.', message_level=2) + + landing_wear = self.get_tyre_and_brake_wear_emission('landing_wear') + self.logger.write_log('\t\tLanding wear emissions calculated.', message_level=2) + + post_taxi_in = self.get_auxiliary_power_unit_emission('post-taxi_in') + self.logger.write_log('\t\tPost taxi in emissions calculated.', message_level=2) + pre_taxi_out = self.get_auxiliary_power_unit_emission('pre-taxi_out') + self.logger.write_log('\t\tPre taxi out emissions calculated.', message_level=2) + + airport_emissions = pd.concat([pre_taxi_out, taxi_out, taxi_in, post_taxi_in]) + airport_emissions = airport_emissions.groupby(['airport_id', 'tstep']).sum() + airport_emissions = self.distribute(airport_emissions, self.airport_distribution) + self.logger.write_log('\t\tAirport emissions distributed (pre_taxi_out, taxi_out, taxi_in, post_taxi_in)', + message_level=2) + + runway_departure_emissions = self.distribute(takeoff, self.runway_departure_distribution) + runway_arrival_emissions = self.distribute(landing, self.runway_arrival_distribution) + runway_arrival_emissions_wear = self.distribute(landing_wear, self.runway_arrival_distribution) + self.logger.write_log('\t\tRunway emissions distributed (takeoff, landing, landing_wear)', message_level=2) + + trajectory_arrival_emissions = self.distribute(approach, self.trajectory_arrival_distribution) + trajectory_departure_emisions = self.distribute(climbout, self.trajectory_departure_distribution) + self.logger.write_log('\t\tTrajectory emissions distributed (approach, climb out)', message_level=2) + + emissions = pd.concat([airport_emissions, runway_departure_emissions, trajectory_arrival_emissions, + trajectory_departure_emisions, runway_arrival_emissions]) + + emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum() + runway_arrival_emissions_wear = runway_arrival_emissions_wear.groupby(['FID', 'layer', 'tstep']).sum() + + if 'hc' in self.source_pollutants: # After Olivier (1991) + emissions['nmvoc'] = 0.9 * emissions['hc'] + emissions['ch4'] = 0.1 * emissions['hc'] + + # Speceiation + runway_arrival_emissions_wear = self.speciate(runway_arrival_emissions_wear, 'landing_wear') + emissions = self.speciate(emissions, 'default') + + emissions = pd.concat([emissions, runway_arrival_emissions_wear]) + emissions = emissions[(emissions.T != 0).any()] + emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum() + + # From kmol/h or kg/h to mol/h or g/h + emissions = emissions * 1000 + self.logger.write_log('\t\tAviation emissions calculated', message_level=2) + self.logger.write_time_log('AviationSector', 'calculate_emissions', timeit.default_timer() - spent_time) + return emissions diff --git a/hermesv3_bu/sectors/livestock_sector.py b/hermesv3_bu/sectors/livestock_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..1d403cfe5f4abfd17a9f4e80d30938240633bb1a --- /dev/null +++ b/hermesv3_bu/sectors/livestock_sector.py @@ -0,0 +1,1044 @@ +#!/usr/bin/env python + +import os +import numpy as np +import pandas as pd +import geopandas as gpd +import timeit +from hermesv3_bu.logger.log import Log +from warnings import warn + +from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.io_server.io_raster import IoRaster +from hermesv3_bu.io_server.io_netcdf import IoNetcdf + +# Constants for grassing daily factor estimation +SIGMA = 60 +TAU = 170 + + +class LivestockSector(Sector): + """ + Class that contains all the information and methods to calculate the livestock emissions. + """ + def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + animal_list, gridded_livestock_path, correction_split_factors_path, temperature_dir, wind_speed_dir, + denominator_yearly_factor_dir, ef_dir, monthly_profiles_path, weekly_profiles_path, + hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path, + nut_shapefile_path): + """ + :param comm: MPI Communicator + + :param logger: Logger + :type logger: Log + + :param auxiliary_dir: Path to the directory where the necessary auxiliary files will be created if them are + not created yet. + :type auxiliary_dir: str + + :param source_pollutants: List of input pollutants to take into account. Agricultural livestock module can + calculate emissions derived from the next source pollutants: NH3, NOx expressed as NO, NMVOC, PM10 and + PM2.5 + ['nox_no', 'nh3', 'nmvoc', 'pm10', 'pm25'] + :type source_pollutants: list + + :param grid_shp: Shapefile that contains the destination grid. It must contains the 'FID' (cell num). + :type grid_shp: GeoPandas.GeoDataframe + + :param clip: Clip. + :type clip: Clip + + :param animal_list: List of animals to take into account. + :type animal_list: list + + :param gridded_livestock_path: Path to the Raster that contains the animal distribution. + '' will be replaced by each animal of the animal list. + :type gridded_livestock_path: str + + :param correction_split_factors_path: Path to the CSV file that contains the correction factors and the + splitting factors to discretizise each animal into theirs different animal types. + '' will be replaced by each animal of the animal list. + The CSV file must contain the following columns ["NUT", "nut_code", "_fact", "_01", + ...] + "nut_code" column must contain the NUT ID + :type correction_split_factors_path: str + + :param date_array: List of datetimes. + :type date_array: list(datetime.datetime, ...) + + :param temperature_dir: Path to the directory that contains the needed temperature files. The temperature + file names have to follow the 'tas_.nc' convention where YYYY is the year and MM the month. + (e.g. 'tas_201601.nc') + That NetCDF file have to contain: + - 'time', 'longitude' and 'latitude' dimensions. + - As many times as days of the month. + - 'latitude' variable + - 'longitude' variable + - 'tas' variable (time, latitude, longitude), 2m temperature, Kelvins + :type temperature_dir: str + + :param wind_speed_dir: Path to the directory that contains the needed wind speed files. The wind speed file + names have to follow the 'sfcWind_.nc' convention where YYYY is the year and MM the month. + (e.g. 'scfWind_201601.nc') + That NetCDF file have to contain: + - 'time', 'longitude' and 'latitude' dimensions. + - As many times as days of the month. + - 'latitude' variable + - 'longitude' variable + - 'sfcWind' variable (time, latitude, longitude), 10 m wind speed, m/s + :type wind_speed_dir: str + + :param denominator_yearly_factor_dir: Path to the directory that contains the needed denominator files. + The denominator file names have to follow the 'grassing_.nc' convention where YYYY is the year. + Have to contains grassing, house_closed, house_open and storage denominators files. + (e.g. 'grassing_2016.nc') + That NetCDF file have to contain: + - 'time', 'longitude' and 'latitude' dimensions. + - One time value + - 'latitude' variable + - 'longitude' variable + - 'FD' variable (time, latitude, longitude) + :type denominator_yearly_factor_dir: str + + :param ef_dir: Path to the CSV file that contains all the information to calculate the emissions for each + input pollutant. + - PM10 (pm10) & PM2.5 (pm25) use the same emission factor file 'pm.csv' with the following columns + ["Code", "Xhousing", "EF_pm10", "EF_pm25"] + - NH3 'nh3.csv' with the following columns ["Code", "Nex", "Xtan", "Xhousing", "Xyards", "Xgraz", + "Xslurry", "Xsolid", "EF_hous_slurry", "EF_hous_solid", "EF_yard", "f_imm", "m_bedding_N", + "x_store_slurry", "x_store_FYM", "f_min", "EF_storage_slurry_NH3", "EF_storage_slurry_N20", + "EF_storage_slurry_NO", "EF_storage_slurry_N2", "EF_storage_solid_NH3", "EF_storage_solid_N2O", + "EF_storage_solid_NO", "EF_storage_solid_N2", "EF_graz"] + - NMVOC 'nmvoc.csv' with the following columns ["Code", "Xhousing", "EF_nmvoc"] + - NOx 'nox_no.csv' with the following columns [Code, Nex, Xtan, Xhousing, Xyards, Xgraz, Xslurry, + Xsolid, EF_hous_slurry, EF_hous_solid, EF_yard, f_imm, m_bedding_N, x_store_slurry, x_store_FYM, + f_min, EF_storage_slurry_NO, EF_storage_solid_NO] + Each csv file have to contain as many rows as animal types with their codes on the "Code" column. + :type ef_dir: str + + :param monthly_profiles_path: Path to the CSV file that contains all the monthly profiles. The CSV file must + contain the following columns [P_month, January, February, ..., November, December] + The P_month code have to be the input pollutant. + :type monthly_profiles_path: str + + :param weekly_profiles_path: Path to the CSV file that contains all the weekly profiles. The CSV file must + contain the following columns [P_week, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday] + The P_week code have to be the input pollutant. + :type weekly_profiles_path: str + + :param hourly_profiles_path: Path to the CSV file that contains all the hourly profiles. The CSV file must + contain the following columns [P_hour, 0, 1, 2, 3, ..., 22, 23] + The P_week code have to be the input pollutant. + :type hourly_profiles_path: str + + :param speciation_map_path: Path to the CSV file that contains the speciation map. The CSV file must contain + the following columns [dst, src, description] + The 'dst' column will be used as output pollutant list and the 'src' column as their onw input pollutant + to be used as a fraction in the speciation profiles. + :type speciation_map_path: str + + :param speciation_profiles_path: Path to the file that contains all the speciation profiles. The CSV file + must contain the "Code" column with the value of each animal of the animal_list. The rest of columns + have to be the sames as the column 'dst' of the 'speciation_map_path' file. + :type speciation_profiles_path: str + + :param molecular_weights_path: Path to the CSV file that contains all the molecular weights needed. The CSV + file must contain the 'Specie' and 'MW' columns. + :type molecular_weights_path: str + + :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain + the 'ORDER07' information with the NUT_code. + :type nut_shapefile_path: str + """ + spent_time = timeit.default_timer() + logger.write_log('===== LIVESTOCK SECTOR =====') + super(LivestockSector, self).__init__( + comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path) + + # Common + self.animal_list = animal_list + self.day_dict = self.calculate_num_days() + + # Paths + self.ef_dir = ef_dir + self.paths = { + 'temperature_dir': temperature_dir, + 'wind_speed_dir': wind_speed_dir, + 'denominator_dir': denominator_yearly_factor_dir, + 'ef_dir': ef_dir, + } + + # Creating dst resolution shapefile with the amount of animals + self.animals_df = self.create_animals_distribution(gridded_livestock_path, nut_shapefile_path, + correction_split_factors_path) + self.logger.write_time_log('LivestockSector', '__init__', timeit.default_timer() - spent_time) + + def create_animals_distribution(self, gridded_livestock_path, nut_shapefile_path, correction_split_factors_path): + """ + Get and distribute the animal distribution between the MPI process. + + The creation of the shapefile belong to the master process. + + :param gridded_livestock_path: Path to the Raster (TIFF) that contains the animal distribution. + '' will be replaced by each animal of the animal list. + :type gridded_livestock_path: str + + :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain + the 'ORDER07' information with the NUT ID. + :type nut_shapefile_path: str + + :param correction_split_factors_path: Path to the CSV file that contains the correction factors and the + splitting factors to discretizise each animal into theirs different animal types. + '' will be replaced by each animal of the animal list. + + The CSV file must contain the following columns ["NUT", "nut_code", "_fact", "_01", ...] + "nut_code" column must contain the NUT ID. + :type correction_split_factors_path: str + + :return: GeoDataframe with the amount of each animal subtype by destiny cell (FID) + Columns: + 'FID', 'cattle_01', 'cattle_02', 'cattle_03' 'cattle_04', 'cattle_05', 'cattle_06', 'cattle_07', + 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', + 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', + 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + self.logger.write_log('\tCreating animal distribution', message_level=2) + # Work for master MPI process + if self.comm.Get_rank() == 0: + animals_df = self.create_animals_shapefile(gridded_livestock_path) + animals_df = self.animal_distribution_by_category(animals_df, nut_shapefile_path, + correction_split_factors_path) + else: + animals_df = None + + # Split distribution, in a balanced way, between MPI process + animals_df = IoShapefile(self.comm).split_shapefile(animals_df) + self.logger.write_log('Animal distribution done', message_level=2) + self.logger.write_time_log('LivestockSector', 'create_animals_distribution', + timeit.default_timer() - spent_time) + + return animals_df + + def calculate_num_days(self): + """ + Create a dictionary with the day as key and num oh hours as value. + + :return: Dictionary with the day as key and num oh hours as value. + :rtype: dict + """ + spent_time = timeit.default_timer() + day_array = [hour.date() for hour in self.date_array] + days, num_days = np.unique(day_array, return_counts=True) + + day_dict = {} + for key, value in zip(days, num_days): + day_dict[key] = value + self.logger.write_time_log('LivestockSector', 'calculate_num_days', timeit.default_timer() - spent_time) + + return day_dict + + def create_animals_shapefile_src_resolution(self, gridded_livestock_path): + """ + Create the animal shapefile in the same resolution as the raster have. + + It will return a complete shapefile with the amount of animals of the animal list. + That shapefile will contain as many columns as animal types of the list ant the 'CELL_ID' as index. + + Each one of the animal distributions will be stored separately in folders following the example path + /livestock/animal_distribution//.shp + + Will be created also the clipped raster (TIFF) following the example path + /livestock/animal_distribution//_clip.tiff + + :param gridded_livestock_path: Path to the Raster (TIFF) that contains the animal distribution. + '' will be replaced by each animal of the animal list. + :type gridded_livestock_path: str + + :return: Shapefile with the amount of each animal of the animal list in the source resolution. + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + self.logger.write_log('\t\tCreating animal shapefile into source resolution', message_level=3) + animal_distribution = None + # For each one of the animals of the animal list + for animal in self.animal_list: + self.logger.write_log('\t\t\t {0}'.format(animal), message_level=3) + # Each one of the animal distributions will be stored separately + animal_distribution_path = os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution', animal, + '{0}.shp'.format(animal)) + if not os.path.exists(animal_distribution_path): + # Create clipped raster file + clipped_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly( + gridded_livestock_path.replace('', animal), self.clip.shapefile, + os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution', animal, + '{0}_clip.tiff'.format(animal))) + + animal_df = IoRaster(self.comm).to_shapefile_serie(clipped_raster_path, animal_distribution_path, + write=True) + else: + animal_df = IoShapefile(self.comm).read_shapefile_serial(animal_distribution_path) + + animal_df.rename(columns={'data': animal}, inplace=True) + animal_df.set_index('CELL_ID', inplace=True) + + # Creating full animal shapefile + if animal_distribution is None: + # First animal type of the list + animal_distribution = animal_df + else: + # Adding new animal distribution values + animal_distribution = pd.concat([animal_distribution, animal_df.loc[:, animal]], axis=1) + # Adding new cell geometries that have not appear in the previous animals + animal_distribution['geometry'] = animal_distribution['geometry'].fillna(animal_df['geometry']) + + # Removing empty data + animal_distribution = animal_distribution.loc[(animal_distribution[self.animal_list] != 0).any(axis=1), :] + self.logger.write_time_log('LivestockSector', 'create_animals_shapefile_src_resolution', + timeit.default_timer() - spent_time) + + return animal_distribution + + def animals_shapefile_to_dst_resolution(self, animal_distribution): + """ + Interpolates the source distribution into the destiny grid. + + :param animal_distribution: Animal distribution shapefile in the source resolution. + :type animal_distribution: geopandas.GeoDataframe + + :return: Animal distribution shapefile in the destiny resolution. + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + self.logger.write_log('\t\tCreating animal shapefile into destiny resolution', message_level=3) + self.grid_shp.reset_index(inplace=True) + # Changing coordinates system to the grid one + animal_distribution.to_crs(self.grid_shp.crs, inplace=True) + # Getting src area + animal_distribution['src_inter_fraction'] = animal_distribution.geometry.area + + # Making the intersection between the src distribution and the destiny grid + animal_distribution = self.spatial_overlays(animal_distribution, self.grid_shp, how='intersection') + # Getting proportion of intersection in the src cell (src_area/portion_area) + animal_distribution['src_inter_fraction'] = \ + animal_distribution.geometry.area / animal_distribution['src_inter_fraction'] + # Applying proportion to src distribution + animal_distribution[self.animal_list] = animal_distribution.loc[:, self.animal_list].multiply( + animal_distribution["src_inter_fraction"], axis="index") + # Sum by destiny cell + animal_distribution = animal_distribution.loc[:, self.animal_list + ['FID']].groupby('FID').sum() + + self.grid_shp.set_index('FID', drop=False, inplace=True) + # Adding geometry and coordinates system from the destiny grid shapefile + animal_distribution = gpd.GeoDataFrame(animal_distribution, crs=self.grid_shp.crs, + geometry=self.grid_shp.loc[animal_distribution.index, 'geometry']) + animal_distribution.reset_index(inplace=True) + self.logger.write_time_log('LivestockSector', 'animals_shapefile_to_dst_resolution', + timeit.default_timer() - spent_time) + + return animal_distribution + + def create_animals_shapefile(self, gridded_livestock_path): + """ + Create the animal distribution shapefile into the destiny resolution grid. + + That auxiliary file will be stored in '/livestock/animal_distribution/animal_distribution.shp' + path. + + Work done on master only once. In the rest of simulations, master will read the work done previously. + + :param gridded_livestock_path: Path to the Raster (TIFF) that contains the animal distribution. + '' will be replaced by each animal of the animal list. + :type gridded_livestock_path: str + + :return: + """ + spent_time = timeit.default_timer() + animal_distribution_path = os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution', + 'animal_distribution.shp') + + if not os.path.exists(animal_distribution_path): + dataframe = self.create_animals_shapefile_src_resolution(gridded_livestock_path) + dataframe = self.animals_shapefile_to_dst_resolution(dataframe) + IoShapefile(self.comm).write_shapefile_serial(dataframe, animal_distribution_path) + else: + dataframe = IoShapefile(self.comm).read_shapefile_serial(animal_distribution_path) + self.logger.write_time_log('LivestockSector', 'create_animals_shapefile', timeit.default_timer() - spent_time) + + return dataframe + + def get_splitting_factors(self, correction_split_factors_path): + """ + Gather all the splitting factors for each sub animal. + + It will multiply each splitting factor by their correction factor. + The result dataframe have to have the nut_code column and all the animal subtype percentages. + + :param correction_split_factors_path: Path to the CSV file that contains the correction factors and the + splitting factors to discretizise each animal into theirs different animal types. + '' will be replaced by each animal of the animal list. + + The CSV file must contain the following columns ["NUT", "nut_code", "_fact", "_01", ...] + "nut_code" column must contain the NUT ID. + :type correction_split_factors_path: str + + :return: Dataframe with the nut_code column and all the animal subtype percentages. + :rtype : pandas.Dataframe + """ + spent_time = timeit.default_timer() + splitting_factors_list = [] + for animal in self.animal_list: + correction_split_factors = pd.read_csv(correction_split_factors_path.replace('', animal)) + correction_split_factors.set_index('nut_code', inplace=True) + + categories = list(correction_split_factors.columns.values) + categories = [e for e in categories if e not in ['NUT', 'nut_code', '{0}_fact'.format(animal)]] + + correction_split_factors[categories] = correction_split_factors.loc[:, categories].multiply( + correction_split_factors['{0}_fact'.format(animal)], axis='index') + correction_split_factors.drop(columns=['NUT', '{0}_fact'.format(animal)], inplace=True) + splitting_factors_list.append(correction_split_factors) + splitting_factors = pd.concat(splitting_factors_list, axis=1) + + splitting_factors.reset_index(inplace=True) + splitting_factors['nut_code'] = splitting_factors['nut_code'].astype(np.int16) + self.logger.write_time_log('LivestockSector', 'get_splitting_factors', timeit.default_timer() - spent_time) + + return splitting_factors + + def animal_distribution_by_category(self, dataframe, nut_shapefile_path, correction_split_factors_path): + """ + Split the animal categories into as many categories as each animal type has. + + :param dataframe: GeoDataframe with the animal distribution by animal type. + :type dataframe: geopandas.GeoDataframe + + :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain + the 'ORDER07' information with the NUT_code. + :type nut_shapefile_path: str + + :param correction_split_factors_path: Path to the CSV file that contains the correction factors and the + splitting factors to discretizise each animal into theirs different animal types. + '' will be replaced by each animal of the animal list. + The CSV file must contain the following columns ["NUT", "nut_code", "_fact", "_01", + ...] + "nut_code" column must contain the NUT ID + :type correction_split_factors_path: str + + :return: GeoDataframe with the amount of each animal subtype by destiny cell (FID) + Columns: + 'FID', 'cattle_01', 'cattle_02', 'cattle_03' 'cattle_04', 'cattle_05', 'cattle_06', 'cattle_07', + 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', + 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', + 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + animal_distribution_path = os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution', + 'animal_distribution_by_cat.shp') + + if not os.path.exists(animal_distribution_path): + dataframe = self.add_nut_code(dataframe, nut_shapefile_path, nut_value='ORDER07') + + splitting_factors = self.get_splitting_factors(correction_split_factors_path) + + # Adding the splitting factors by NUT code + dataframe = pd.merge(dataframe, splitting_factors, how='left', on='nut_code') + + dataframe.drop(columns=['nut_code'], inplace=True) + + for animal in self.animal_list: + animal_types = [i for i in list(dataframe.columns.values) if i.startswith(animal)] + dataframe.loc[:, animal_types] = dataframe.loc[:, animal_types].multiply(dataframe[animal], + axis='index') + dataframe.drop(columns=[animal], inplace=True) + + dataframe = self.add_timezone(dataframe) + IoShapefile(self.comm).write_shapefile_serial(dataframe, animal_distribution_path) + else: + dataframe = IoShapefile(self.comm).read_shapefile_serial(animal_distribution_path) + self.logger.write_time_log('LivestockSector', 'animal_distribution_by_category', + timeit.default_timer() - spent_time) + + return dataframe + + def get_daily_factors(self, animal_shp, day): + """ + Calculate the daily factors necessaries. + + This function returns a shapefile with the following columns: + - 'REC': ID number of the destiny cell. + - 'FD_housing_open': Daily factor for housing open emissions. + - 'FD_housing_closed': Daily factor for housing close emissions. + - 'FD_storage': Daily factor for storage emissions. + - 'FD_grassing': Daily factor for grassing emissions. + - 'geometry': Destiny cell geometry + + :param animal_shp: GeoDataframe with the amount of each animal subtype by destiny cell (FID) + Columns: + 'FID', 'cattle_01', 'cattle_02', 'cattle_03' 'cattle_04', 'cattle_05', 'cattle_06', 'cattle_07', + 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', + 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', + 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' + :type animal_shp: geopandas.GeoDataframe + + :param day: Date of the day to generate. + :type day: datetime.date + + :return: Shapefile with the daily factors. + :rtype: geopandas.GeoDataframe + """ + import math + spent_time = timeit.default_timer() + # Extract the points where we want meteorological parameters + geometry_shp = animal_shp.loc[:, ['FID', 'geometry']].to_crs({'init': 'epsg:4326'}) + geometry_shp['c_lat'] = geometry_shp.centroid.y + geometry_shp['c_lon'] = geometry_shp.centroid.x + geometry_shp['centroid'] = geometry_shp.centroid + geometry_shp.drop(columns='geometry', inplace=True) + + # Extracting temperature + meteo = IoNetcdf(self.comm).get_data_from_netcdf( + os.path.join(self.paths['temperature_dir'], 'tas_{0}{1}.nc'.format(day.year, str(day.month).zfill(2))), + 'tas', 'daily', day, geometry_shp) + meteo['tas'] = meteo['tas'] - 273.15 # From Celsius to Kelvin degrees + # Extracting wind speed + meteo['sfcWind'] = IoNetcdf(self.comm).get_data_from_netcdf( + os.path.join(self.paths['wind_speed_dir'], 'sfcWind_{0}{1}.nc'.format(day.year, str(day.month).zfill(2))), + 'sfcWind', 'daily', day, geometry_shp).loc[:, 'sfcWind'] + + # Extracting denominators already calculated for all the emission types + meteo['D_grassing'] = IoNetcdf(self.comm).get_data_from_netcdf( + os.path.join(self.paths['denominator_dir'], 'grassing_{0}.nc'.format(day.year)), + 'FD', 'yearly', day, geometry_shp).loc[:, 'FD'] + meteo['D_housing_closed'] = IoNetcdf(self.comm).get_data_from_netcdf( + os.path.join(self.paths['denominator_dir'], 'housing_closed_{0}.nc'.format(day.year)), + 'FD', 'yearly', day, geometry_shp).loc[:, 'FD'] + meteo['D_housing_open'] = IoNetcdf(self.comm).get_data_from_netcdf( + os.path.join(self.paths['denominator_dir'], 'housing_open_{0}.nc'.format(day.year)), + 'FD', 'yearly', day, geometry_shp).loc[:, 'FD'] + meteo['D_storage'] = IoNetcdf(self.comm).get_data_from_netcdf( + os.path.join(self.paths['denominator_dir'], 'storage_{0}.nc'.format(day.year)), + 'FD', 'yearly', day, geometry_shp).loc[:, 'FD'] + + # ===== Daily Factor for housing open ===== + meteo.loc[meteo['tas'] < 1, 'FD_housing_open'] = ((4.0 ** 0.89) * (0.228 ** 0.26)) / (meteo['D_housing_open']) + meteo.loc[meteo['tas'] >= 1, 'FD_housing_open'] = (((meteo['tas'] + 3.0) ** 0.89) * (0.228 ** 0.26)) / ( + meteo['D_housing_open']) + + # ===== Daily Factor for housing closed ===== + meteo.loc[meteo['tas'] < 0, 'FD_housing_closed'] = \ + ((np.maximum([0], 18.0 + meteo['tas'].multiply(0.5)) ** 0.89) * (0.2 ** 0.26)) / (meteo['D_housing_closed']) + meteo.loc[(meteo['tas'] >= 0) & (meteo['tas'] <= 12.5), 'FD_housing_closed'] = \ + ((18.0 ** 0.89) * ((0.2 + meteo['tas'].multiply((0.38 - 0.2) / 12.5)) ** 0.26)) / ( + meteo['D_housing_closed']) + meteo.loc[meteo['tas'] > 12.5, 'FD_housing_closed'] = \ + (((18.0 + (meteo['tas'] - 12.5).multiply(0.77)) ** 0.89) * (0.38 ** 0.26)) / (meteo['D_housing_closed']) + + # ===== Daily Factor for storage ===== + meteo.loc[meteo['tas'] < 1, 'FD_storage'] = ((1 ** 0.89) * (meteo['sfcWind'] ** 0.26)) / (meteo['D_storage']) + meteo.loc[meteo['tas'] >= 1, 'FD_storage'] = \ + ((meteo['tas'] ** 0.89) * (meteo['sfcWind'] ** 0.26)) / (meteo['D_storage']) + + # ===== Daily Factor for grassing ===== + meteo.loc[:, 'FD_grassing'] = \ + (np.exp(meteo['tas'].multiply(0.0223)) * np.exp(meteo['sfcWind'].multiply(0.0419))) / (meteo['D_grassing']) + meteo.loc[:, 'FD_grassing'] = \ + meteo.loc[:, 'FD_grassing'].multiply((1 / (SIGMA * math.sqrt(2 * math.pi))) * math.exp( + (float(int(day.strftime('%j')) - TAU) ** 2) / (-2 * (SIGMA ** 2)))) + + self.logger.write_time_log('LivestockSector', 'get_daily_factors', timeit.default_timer() - spent_time) + + return meteo.loc[:, ['REC', 'FD_housing_open', 'FD_housing_closed', 'FD_storage', 'FD_grassing', 'geometry']] + + def get_nh3_ef(self): + """ + Calculate the emission factor for yarding, grazing, housing and storage emissions + + :return: Dataframe with the Emission factors as columns and animal subtypes as 'Code' + """ + spent_time = timeit.default_timer() + ef_df = pd.read_csv(os.path.join(self.ef_dir, 'nh3.csv')) + + new_df = ef_df.loc[:, ['Code']] + + new_df['EF_yarding'] = ef_df['Nex'] * ef_df['Xtan'] * ef_df['Xyards'] * ef_df['EF_yard'] + new_df['EF_grazing'] = ef_df['Nex'] * ef_df['Xtan'] * ef_df['Xgraz'] * ef_df['EF_graz'] + new_df['EF_housing'] = \ + ef_df['Nex'] * ef_df['Xtan'] * ef_df['Xhousing'] * \ + ((ef_df['EF_hous_slurry'] * ef_df['Xslurry']) + (ef_df['EF_hous_solid'] * ef_df['Xsolid'])) + + new_df['Estorage_sd_l'] = \ + ((((ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xtan'] * ef_df['Xsolid']) - + (ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xtan'] * ef_df['Xsolid'] * ef_df['EF_hous_solid'])) * + (1 - ef_df['f_imm'])) * ef_df['x_store_FYM']) * ef_df['EF_storage_solid_NH3'] + new_df['Mstorage_slurry_TAN'] = \ + (((ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xtan'] * ef_df['Xslurry']) - + (ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xtan'] * ef_df['Xslurry'] * ef_df['EF_hous_slurry'])) + + ((ef_df['Nex'] * ef_df['Xyards'] * ef_df['Xtan']) - + (ef_df['Nex'] * ef_df['Xyards'] * ef_df['Xtan'] * ef_df['EF_yard']))) * ef_df['x_store_slurry'] + new_df['Mstorage_slurry_N'] = \ + (((ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xslurry']) - + (ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xslurry'] * ef_df['Xtan'] * ef_df['EF_hous_slurry'])) + + ((ef_df['Nex'] * ef_df['Xyards']) - (ef_df['Nex'] * ef_df['Xyards'] * ef_df['Xtan'] * + ef_df['EF_yard']))) * ef_df['x_store_slurry'] + + new_df['Estorage_sl_l'] = \ + (new_df['Mstorage_slurry_TAN'] + ((new_df['Mstorage_slurry_N'] - new_df['Mstorage_slurry_TAN']) * + ef_df['f_min'])) * ef_df['EF_storage_slurry_NH3'] + new_df.drop(['Mstorage_slurry_TAN', 'Mstorage_slurry_N'], axis=1, inplace=True) + + new_df['EF_storage'] = new_df['Estorage_sd_l'] + new_df['Estorage_sl_l'] + new_df.drop(['Estorage_sd_l', 'Estorage_sl_l'], axis=1, inplace=True) + self.logger.write_time_log('LivestockSector', 'get_nh3_ef', timeit.default_timer() - spent_time) + + return new_df + + def get_nox_no_ef(self): + """ + Calculate the emission factor for storage emissions + + :return: Dataframe with the Emission factors as columns and animal subtypes as 'Code' + """ + spent_time = timeit.default_timer() + ef_df = pd.read_csv(os.path.join(self.ef_dir, 'nox_no.csv')) + + new_df = ef_df.loc[:, ['Code']] + + new_df['Estorage_sd_l'] = \ + ((((ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xtan'] * ef_df['Xsolid']) - + (ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xtan'] * ef_df['Xsolid'] * ef_df['EF_hous_solid'])) * + (1 - ef_df['f_imm'])) * ef_df['x_store_FYM']) * ef_df['EF_storage_solid_NO'] + new_df['Mstorage_slurry_TAN'] = \ + (((ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xtan'] * ef_df['Xslurry']) - + (ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xtan'] * ef_df['Xslurry'] * ef_df['EF_hous_slurry'])) + + ((ef_df['Nex'] * ef_df['Xyards'] * ef_df['Xtan']) - + (ef_df['Nex'] * ef_df['Xyards'] * ef_df['Xtan'] * ef_df['EF_yard']))) * ef_df['x_store_slurry'] + new_df['Mstorage_slurry_N'] = \ + (((ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xslurry']) - + (ef_df['Nex'] * ef_df['Xhousing'] * ef_df['Xslurry'] * ef_df['Xtan'] * ef_df['EF_hous_slurry'])) + + ((ef_df['Nex'] * ef_df['Xyards']) - + (ef_df['Nex'] * ef_df['Xyards'] * ef_df['Xtan'] * ef_df['EF_yard']))) * ef_df['x_store_slurry'] + + new_df['Estorage_sl_l'] = \ + (new_df['Mstorage_slurry_TAN'] + ((new_df['Mstorage_slurry_N'] - new_df['Mstorage_slurry_TAN']) * + ef_df['f_min'])) * ef_df['EF_storage_slurry_NO'] + new_df.drop(['Mstorage_slurry_TAN', 'Mstorage_slurry_N'], axis=1, inplace=True) + + new_df['EF_storage'] = new_df['Estorage_sd_l'] + new_df['Estorage_sl_l'] + new_df.drop(['Estorage_sd_l', 'Estorage_sl_l'], axis=1, inplace=True) + self.logger.write_time_log('LivestockSector', 'get_nox_no_ef', timeit.default_timer() - spent_time) + + return new_df + + def add_daily_factors_to_animal_distribution(self, animals_df, daily_factors): + """ + Add to the animal distribution the daily factors. + + :param animals_df: GeoDataframe with the amount of each animal subtype by destiny cell (FID) + Columns: + 'FID', 'cattle_01', 'cattle_02', 'cattle_03' 'cattle_04', 'cattle_05', 'cattle_06', 'cattle_07', + 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', + 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', + 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' + :type animals_df: geopandas.GeoDataframe + + :param daily_factors: GeoDataframe with the daily factors. + Columns: + 'REC', 'geometry', 'FD_housing_open', 'FD_housing_closed, 'FD_storage', 'FD_grassing' + :type daily_factors: geopandas.GeoDataframe + + :return: Animal distribution with the daily factors. + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + animals_df = animals_df.to_crs({'init': 'epsg:4326'}) + animals_df['centroid'] = animals_df.centroid + + animals_df['REC'] = animals_df.apply(self.nearest, geom_union=daily_factors.unary_union, df1=animals_df, + df2=daily_factors, geom1_col='centroid', src_column='REC', axis=1) + + animals_df = pd.merge(animals_df, daily_factors, how='left', on='REC') + + animals_df.drop(columns=['centroid', 'REC', 'geometry_y'], axis=1, inplace=True) + animals_df.rename(columns={'geometry_x': 'geometry'}, inplace=True) + self.logger.write_time_log('LivestockSector', 'add_daily_factors_to_animal_distribution', + timeit.default_timer() - spent_time) + + return animals_df + + def calculate_day_emissions(self, animals_df, day): + """ + Calculate the emissions, already speciated, corresponding to the given day. + + :param animals_df: GeoDataframe with the amount of each animal subtype by destiny cell (FID) + Columns: + 'FID', 'cattle_01', 'cattle_02', 'cattle_03' 'cattle_04', 'cattle_05', 'cattle_06', 'cattle_07', + 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', + 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', + 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' + :type animals_df: geopandas.GeoDataframe + + :param day: Date of the day to generate. + :type day: datetime.date + + :return: GeoDataframe with the daily emissions by destiny cell. + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + daily_factors = self.get_daily_factors(animals_df.loc[:, ['FID', 'geometry']], day) + animals_df = self.add_daily_factors_to_animal_distribution(animals_df, daily_factors) + + out_df = animals_df.loc[:, ['FID', 'timezone', 'geometry']] + + # ===== NH3 ===== + if 'nh3' in [x.lower() for x in self.source_pollutants]: + # get_list out_pollutants from speciation map -> NH3 + out_pollutants = self.get_output_pollutants('nh3') + for out_p in out_pollutants: + self.logger.write_log('\t\t\tCalculating {0} emissions'.format(out_p), message_level=3) + out_df[out_p] = 0 + if out_p not in self.output_pollutants: + self.output_pollutants.append(out_p) + for i, animal in self.get_nh3_ef().iterrows(): + # Iterating by animal subtype + if animal.Code.startswith(tuple(self.animal_list)): + # Housing emissions + if animal.Code.startswith(('cattle', 'sheep', 'goats')): + # Housing open emissions + out_df.loc[:, out_p] += \ + (animals_df[animal['Code']] * animals_df['FD_housing_open']).multiply( + animal['EF_housing']) + elif animal.Code.startswith(('chicken', 'pigs')): + # Housing close emissions + out_df.loc[:, out_p] += \ + (animals_df[animal['Code']] * animals_df['FD_housing_closed']).multiply( + animal['EF_housing']) + else: + raise KeyError('Animal {0} not found on the nh3 emission factors file.'.format(animal.Code)) + # Storage emissions + out_df.loc[:, out_p] += \ + (animals_df[animal['Code']] * animals_df['FD_storage']).multiply(animal['EF_yarding']) + # Grassing emissions + out_df.loc[:, out_p] += \ + (animals_df[animal['Code']] * animals_df['FD_grassing']).multiply(animal['EF_grazing']) + # Storage emissions + out_df.loc[:, out_p] += \ + (animals_df[animal['Code']] * animals_df['FD_storage']).multiply(animal['EF_storage']) + + # From kg NH3-N to mol NH3 + out_df.loc[:, out_p] = out_df.loc[:, out_p].multiply( + (17. / 14.) * 1000. * (1. / self.molecular_weights['nh3'])) + + # ===== NMVOC ===== + if 'nmvoc' in [x.lower() for x in self.source_pollutants]: + # get_list out_pollutants from speciation map -> PAR, OLE, TOL ... (15 species) + out_pollutants = self.get_output_pollutants('nmvoc') + for out_p in out_pollutants: + self.logger.write_log('\t\t\tCalculating {0} emissions'.format(out_p), message_level=3) + out_df[out_p] = 0 + if out_p not in self.output_pollutants: + self.output_pollutants.append(out_p) + for i, animal in pd.read_csv(os.path.join(self.ef_dir, 'nmvoc.csv')).iterrows(): + # Iterating by animal subtype + if animal.Code.startswith(tuple(self.animal_list)): + # Housing emissions + if animal.Code.startswith(('cattle',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_nmvoc'] * self.speciation_profile.loc['cattle', out_p]) + elif animal.Code.startswith(('pigs',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_nmvoc'] * self.speciation_profile.loc['pigs', out_p]) + elif animal.Code.startswith(('chicken',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_nmvoc'] * self.speciation_profile.loc['chicken', out_p]) + elif animal.Code.startswith(('sheep',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_nmvoc'] * self.speciation_profile.loc['sheep', out_p]) + elif animal.Code.startswith(('goats',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_nmvoc'] * self.speciation_profile.loc['goats', out_p]) + + out_df.loc[:, out_p] = out_df.loc[:, out_p].multiply(1000. * (1. / self.molecular_weights['nmvoc'])) + + # ===== PM10 ===== + if 'pm10' in [x.lower() for x in self.source_pollutants]: + out_pollutants = self.get_output_pollutants('pm10') + for out_p in out_pollutants: + self.logger.write_log('\t\t\tCalculating {0} emissions'.format(out_p), message_level=3) + out_df[out_p] = 0 + if out_p not in self.output_pollutants: + self.output_pollutants.append(out_p) + for i, animal in pd.read_csv(os.path.join(self.ef_dir, 'pm.csv')).iterrows(): + # Iterating by animal subtype + if animal.Code.startswith(tuple(self.animal_list)): + if animal.Code.startswith(('cattle',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10'] * self.speciation_profile.loc['cattle', out_p]) + elif animal.Code.startswith(('pigs',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10'] * self.speciation_profile.loc['pigs', out_p]) + elif animal.Code.startswith(('chicken',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10'] * self.speciation_profile.loc['chicken', out_p]) + elif animal.Code.startswith(('sheep',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10'] * self.speciation_profile.loc['sheep', out_p]) + elif animal.Code.startswith(('goats',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10'] * self.speciation_profile.loc['goats', out_p]) + out_df.loc[:, out_p] = out_df.loc[:, out_p].multiply(1000. * (1. / self.molecular_weights['pm10'])) + + # Preparing PM10 for PMC + if 'pmc' in [x.lower() for x in self.speciation_map.iterkeys()]: + out_df['aux_pm10'] = 0 + for i, animal in pd.read_csv(os.path.join(self.ef_dir, 'pm.csv')).iterrows(): + # Iterating by animal subtype + if animal.Code.startswith(tuple(self.animal_list)): + if animal.Code.startswith(('cattle',)): + out_df.loc[:, 'aux_pm10'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10']) + elif animal.Code.startswith(('pigs',)): + out_df.loc[:, 'aux_pm10'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10']) + elif animal.Code.startswith(('chicken',)): + out_df.loc[:, 'aux_pm10'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10']) + elif animal.Code.startswith(('sheep',)): + out_df.loc[:, 'aux_pm10'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10']) + elif animal.Code.startswith(('goats',)): + out_df.loc[:, 'aux_pm10'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm10']) + out_df.loc[:, 'aux_pm10'] = out_df.loc[:, 'aux_pm10'].multiply( + 1000. * (1. / self.molecular_weights['pm10'])) + + # ===== PM2.5 ===== + if 'pm25' in [x.lower() for x in self.source_pollutants]: + out_pollutants = self.get_output_pollutants('pm25') + for out_p in out_pollutants: + self.logger.write_log('\t\t\tCalculating {0} emissions'.format(out_p), message_level=3) + out_df[out_p] = 0 + if out_p not in self.output_pollutants: + self.output_pollutants.append(out_p) + for i, animal in pd.read_csv(os.path.join(self.ef_dir, 'pm.csv')).iterrows(): + if animal.Code.startswith(tuple(self.animal_list)): + # Iterating by animal subtype + if animal.Code.startswith(('cattle',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25'] * self.speciation_profile.loc['cattle', out_p]) + elif animal.Code.startswith(('pigs',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25'] * self.speciation_profile.loc['pigs', out_p]) + elif animal.Code.startswith(('chicken',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25'] * self.speciation_profile.loc['chicken', out_p]) + elif animal.Code.startswith(('sheep',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25'] * self.speciation_profile.loc['sheep', out_p]) + elif animal.Code.startswith(('goats',)): + out_df.loc[:, out_p] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25'] * self.speciation_profile.loc['goats', out_p]) + out_df.loc[:, out_p] = out_df.loc[:, out_p].multiply(1000. * (1. / self.molecular_weights['pm25'])) + + # Preparing PM2.5 for PMC + if 'pmc' in [x.lower() for x in self.speciation_map.iterkeys()]: + out_df['aux_pm25'] = 0 + for i, animal in pd.read_csv(os.path.join(self.ef_dir, 'pm.csv')).iterrows(): + if animal.Code.startswith(tuple(self.animal_list)): + # Iterating by animal subtype + if animal.Code.startswith(('cattle',)): + out_df.loc[:, 'aux_pm25'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25']) + elif animal.Code.startswith(('pigs',)): + out_df.loc[:, 'aux_pm25'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25']) + elif animal.Code.startswith(('chicken',)): + out_df.loc[:, 'aux_pm25'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25']) + elif animal.Code.startswith(('sheep',)): + out_df.loc[:, 'aux_pm25'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25']) + elif animal.Code.startswith(('goats',)): + out_df.loc[:, 'aux_pm25'] += animals_df[animal['Code']].multiply( + animal['Xhousing'] * animal['EF_pm25']) + out_df.loc[:, 'aux_pm25'] = out_df.loc[:, 'aux_pm25'].multiply( + 1000. * (1. / self.molecular_weights['pm25'])) + + # ===== NOX_NO ===== + if 'nox_no' in [x.lower() for x in self.source_pollutants]: + out_pollutants = self.get_output_pollutants('nox_no') + for out_p in out_pollutants: + self.logger.write_log('\t\t\tCalculating {0} emissions'.format(out_p), message_level=3) + out_df[out_p] = 0 + if out_p not in self.output_pollutants: + self.output_pollutants.append(out_p) + + for i, animal in self.get_nox_no_ef().iterrows(): + # Iterating by animal subtype + if animal.Code.startswith(tuple(self.animal_list)): + # Storage emissions + out_df.loc[:, out_p] += (animals_df[animal['Code']] * animals_df['FD_storage']).multiply( + animal['EF_storage']) + + # From kg NOX-N to mol NO + out_df.loc[:, out_p] = out_df.loc[:, out_p].multiply( + (30. / 14.) * 1000. * (1. / self.molecular_weights['nox_no'])) + + # ===== PMC ===== + if 'pmc' in [x.lower() for x in self.speciation_map.iterkeys()]: + pmc_name = 'PMC' + self.logger.write_log('\t\t\tCalculating {0} emissions'.format(pmc_name), message_level=3) + if all(x in [x.lower() for x in self.source_pollutants] for x in ['pm10', 'pm25']): + if pmc_name not in self.output_pollutants: + self.output_pollutants.append(pmc_name) + out_df[pmc_name] = out_df['aux_pm10'] - out_df['aux_pm25'] + out_df.drop(columns=['aux_pm10', 'aux_pm25'], axis=1, inplace=True) + else: + warn("WARNING: '{0}' cannot be calculated because 'pm10' or/and 'pm25' ".format(pmc_name) + + "are not in the livestock_source_pollutants list") + + not_pollutants = [poll for poll in self.source_pollutants + if poll not in ['nh3', 'nox_no', 'nh3', 'nmvoc', 'pm10', 'pm25']] + if len(not_pollutants) > 0: + if self.comm.Get_rank() == 0: + warn('The pollutants {0} cannot be calculated on the Livestock sector'.format(not_pollutants)) + self.logger.write_time_log('LivestockSector', 'calculate_day_emissions', timeit.default_timer() - spent_time) + + return out_df + + def calculate_daily_emissions_dict(self, animals_df): + """ + Calculate the daily emissions setting it in a dictionary with the day as key. + + :param animals_df: GeoDataframe with the amount of each animal subtype by destiny cell (FID) + Columns: + 'FID', 'cattle_01', 'cattle_02', 'cattle_03' 'cattle_04', 'cattle_05', 'cattle_06', 'cattle_07', + 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', + 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', + 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' + :type animals_df: geopandas.GeoDataframe + + :return: Dictionary with the day as key (same key as self.day_dict) and the daily emissions as value. + :rtype: dict + """ + spent_time = timeit.default_timer() + daily_emissions = {} + for day in self.day_dict.keys(): + daily_emissions[day] = self.calculate_day_emissions(animals_df, day) + self.logger.write_time_log('LivestockSector', 'calculate_daily_emissions_dict', + timeit.default_timer() - spent_time) + + return daily_emissions + + def add_dates(self, df_by_day): + """ + Expand each daily dataframe into a single dataframe with all the time steps. + + :param df_by_day: Dictionary with the daily emissions for each day. + :type df_by_day: dict + + :return: GeoDataframe with all the time steps (each time step have the daily emission) + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + df_list = [] + for tstep, date in enumerate(self.date_array): + df_aux = df_by_day[date.date()].copy() + df_aux['date'] = pd.to_datetime(date, utc=True) + df_aux['date_utc'] = pd.to_datetime(date, utc=True) + df_aux['tstep'] = tstep + df_list.append(df_aux) + dataframe_by_day = pd.concat(df_list, ignore_index=True) + + dataframe_by_day = self.to_timezone(dataframe_by_day) + self.logger.write_time_log('LivestockSector', 'add_dates', timeit.default_timer() - spent_time) + + return dataframe_by_day + + def calculate_hourly_distribution(self, dict_by_day): + """ + Calculate the hourly distribution for all the emissions. + + The NH3 & NOX_NO emissions have to be also monthly and weekly distributed. + + :param dict_by_day: Dictionary with the day as key (same key as self.day_dict) and the daily emissions as value. + :type dict_by_day: dict + + :return: GeoDataframe with the hourly distribution. + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + + def distribute_weekly(df): + import datetime + date_np = df.head(1)['date'].values[0] + date = datetime.datetime.utcfromtimestamp(date_np.astype(int) * 1e-9) + profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc[in_p, :].to_dict(), date) + + df[out_p] = df[out_p].multiply(profile[df.name[1]]) + return df.loc[:, [out_p]] + + # Create unique dataframe + distribution = self.add_dates(dict_by_day) + + distribution['hour'] = distribution['date'].dt.hour + for out_p in self.output_pollutants: + self.logger.write_log('\t\t\tDistributing {0} emissions to hourly resolution'.format(out_p), + message_level=3) + if out_p.lower() == 'pmc': + in_p = 'pmc' + else: + in_p = self.speciation_map[out_p] + + # NH3 & NOX_NO emissions have to be also monthly and weekly distributed + if in_p.lower() not in ['nh3', 'nox_no']: + # Monthly distribution + distribution['month'] = distribution['date'].dt.month + distribution[out_p] = distribution.groupby('month')[out_p].apply(lambda x: x.multiply( + self.monthly_profiles.loc[in_p, x.name])) + + # Weekday distribution + distribution['weekday'] = distribution['date'].dt.weekday + + distribution[out_p] = distribution.groupby(['month', 'weekday'])['date', out_p].apply(distribute_weekly) + + distribution.drop(columns=['month', 'weekday'], axis=1, inplace=True) + # Hourly distribution + distribution[out_p] = distribution.groupby('hour')[out_p].apply(lambda x: x.multiply( + self.hourly_profiles.loc[in_p, x.name])) + + distribution['date'] = distribution['date_utc'] + distribution.drop(columns=['hour', 'date_utc'], axis=1, inplace=True) + self.logger.write_time_log('LivestockSector', 'calculate_hourly_distribution', + timeit.default_timer() - spent_time) + + return distribution + + def calculate_emissions(self): + """ + Calculate the livestock emissions hourly distributed. + + :return: GeoDataframe with all the emissions. + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + self.logger.write_log('\t\tCalculating Daily emissions', message_level=2) + df_by_day = self.calculate_daily_emissions_dict(self.animals_df) + self.logger.write_log('\t\tCalculating hourly emissions', message_level=2) + animals_df = self.calculate_hourly_distribution(df_by_day) + + animals_df.drop(columns=['geometry'], inplace=True) + animals_df['layer'] = 0 + + animals_df = animals_df.groupby(['FID', 'layer', 'tstep']).sum() + self.logger.write_log('\t\tLivestock emissions calculated', message_level=2) + self.logger.write_time_log('LivestockSector', 'calculate_emissions', timeit.default_timer() - spent_time) + + return animals_df diff --git a/hermesv3_bu/sectors/point_source_sector.py b/hermesv3_bu/sectors/point_source_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..d313d068f532ec6b7113f1864c3e3f1b9a40144b --- /dev/null +++ b/hermesv3_bu/sectors/point_source_sector.py @@ -0,0 +1,794 @@ +#!/usr/bin/env python + +import os +import timeit +import numpy as np +import pandas as pd +import geopandas as gpd +from warnings import warn +from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.io_server.io_shapefile import IoShapefile +# from hermesv3_bu.io_server.io_netcdf import IoNetcdf +from hermesv3_bu.logger.log import Log + +INTERPOLATION_TYPE = 'linear' +# GRAVITI m/s-2 +GRAVITY = 9.81 +# CP J/kg*K +CP = 1005 + + +class PointSourceSector(Sector): + """ + Class to calculate the Point Source emissions + + :param grid_shp: Grid of the destination domain + :type grid_shp: Grid + + :param catalog_path: Path to the fine that contains all the information for each point source. + :type catalog_path: str + + :param monthly_profiles_path: Path to the file that contains the monthly profiles. + :type monthly_profiles_path: str + + :param weekly_profiles_path: Path to the file that contains the weekly profiles. + :type weekly_profiles_path: str + + :param hourly_profiles_path: Path to the file that contains the hourly profile. + :type hourly_profiles_path: str + + :param speciation_map_path: Path to the file that contains the speciation map. + :type speciation_map_path: str + + :param speciation_profiles_path: Path to the file that contains the speciation profiles. + :type speciation_profiles_path: str + + :param sector_list: List os sectors (SNAPS) to take into account. 01, 03, 04, 09 + :type sector_list: list + """ + def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + catalog_path, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, + speciation_map_path, speciation_profiles_path, sector_list, measured_emission_path, + molecular_weights_path, plume_rise=False, plume_rise_pahts=None): + spent_time = timeit.default_timer() + + super(PointSourceSector, self).__init__( + comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path) + + self.plume_rise = plume_rise + self.catalog = self.read_catalog(catalog_path, sector_list) + + self.catalog_measured = self.read_catalog_for_measured_emissions(catalog_path, sector_list) + self.measured_path = measured_emission_path + self.plume_rise_pahts = plume_rise_pahts + + self.logger.write_time_log('PointSourceSector', '__init__', timeit.default_timer() - spent_time) + + def read_catalog(self, catalog_path, sector_list): + """ + Read the catalog + + :param catalog_path: path to the catalog + :type catalog_path: str + + :param sector_list: List of sectors to take into account + :type sector_list: list + + :return: catalog + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + if self.comm.Get_rank() == 0: + if self.plume_rise: + columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Lon": np.float64, + "Lat": np.float64, "Height": np.float64, "Diameter": np.float64, + "Speed": np.float64, "Temp": np.float64, "AF": np.float64, + "P_month": np.str, "P_week": np.str, "P_hour": np.str, "P_spec": np.str} + else: + columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Lon": np.float64, + "Lat": np.float64, "Height": np.float64, "AF": np.float64, + "P_month": np.str, "P_week": np.str, "P_hour": np.str, "P_spec": np.str} + for pollutant in self.source_pollutants: + # EF in Kg / Activity factor + columns['EF_{0}'.format(pollutant)] = np.float64 + + catalog_df = pd.read_csv(catalog_path, usecols=columns.keys(), dtype=columns) + + # Filtering + catalog_df = catalog_df.loc[catalog_df['Cons'] == 1, :] + catalog_df.drop('Cons', axis=1, inplace=True) + + # Filtering + catalog_df = catalog_df.loc[catalog_df['AF'] != -1, :] + + if sector_list is not None: + catalog_df = catalog_df.loc[catalog_df['SNAP'].str[:2].isin(sector_list)] + catalog_df.drop('SNAP', axis=1, inplace=True) + catalog_df.sort_values('Lat', inplace=True) + + catalog_df = self.to_geodataframe(catalog_df) + + catalog_df = gpd.sjoin(catalog_df, self.clip.shapefile.to_crs(catalog_df.crs), how='inner') + catalog_df.drop(columns=['index_right'], inplace=True) + + else: + catalog_df = None + self.comm.Barrier() + catalog_df = IoShapefile(self.comm).split_shapefile(catalog_df) + self.logger.write_time_log('PointSourceSector', 'read_catalog', timeit.default_timer() - spent_time) + return catalog_df + + def read_catalog_for_measured_emissions(self, catalog_path, sector_list): + """ + Read the catalog + + :param catalog_path: path to the catalog + :type catalog_path: str + + :param sector_list: List of sectors to take into account + :type sector_list: list + + :return: catalog + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + if self.plume_rise: + columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Lon": np.float64, "Lat": np.float64, + "Height": np.float64, "Diameter": np.float64, "Speed": np.float64, "Temp": np.float64, + "AF": np.float64, "P_spec": np.str} + else: + columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Lon": np.float64, "Lat": np.float64, + "Height": np.float64, "AF": np.float64, "P_spec": np.str} + # for pollutant in self.pollutant_list: + # columns['EF_{0}'.format(pollutant)] = settings.precision + + catalog_df = pd.read_csv(catalog_path, usecols=columns.keys(), dtype=columns) + + # Filtering + catalog_df = catalog_df.loc[catalog_df['Cons'] == 1, :] + catalog_df.drop('Cons', axis=1, inplace=True) + + # Filtering + catalog_df = catalog_df.loc[catalog_df['AF'] == -1, :] + catalog_df.drop('AF', axis=1, inplace=True) + + if sector_list is not None: + catalog_df = catalog_df.loc[catalog_df['SNAP'].str[:2].isin(sector_list)] + catalog_df.drop('SNAP', axis=1, inplace=True) + + self.logger.write_time_log('PointSourceSector', 'read_catalog_for_measured_emissions', + timeit.default_timer() - spent_time) + return catalog_df + + def to_geodataframe(self, catalog): + """ + Convert a simple DataFrame with Lat, Lon columns into a GeoDataFrame as a shape + + :param catalog: DataFrame with all the information of each point source. + :type catalog: DataFrame + + :return: GeoDataFrame with all the information of each point source. + :rtype: GeoDataFrame + """ + from shapely.geometry import Point + spent_time = timeit.default_timer() + + geometry = [Point(xy) for xy in zip(catalog.Lon, catalog.Lat)] + catalog.drop(['Lon', 'Lat'], axis=1, inplace=True) + crs = {'init': 'epsg:4326'} + catalog = gpd.GeoDataFrame(catalog, crs=crs, geometry=geometry) + self.logger.write_time_log('PointSourceSector', 'to_geodataframe', timeit.default_timer() - spent_time) + return catalog + + def get_yearly_emissions(self, catalog): + """ + Calculate yearly emissions. + + :param catalog: Catalog with the activity factor (AF) column and all the emission factor column for each + pollutant. + :type catalog: DataFrame + + :return: Catalog with yearly emissions of each point source for all the pollutants (as column names). + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating yearly emissions', message_level=2) + for pollutant in self.source_pollutants: + catalog.rename(columns={u'EF_{0}'.format(pollutant): pollutant}, inplace=True) + catalog[pollutant] = catalog[pollutant] * catalog['AF'] + + catalog.drop('AF', axis=1, inplace=True) + self.logger.write_time_log('PointSourceSector', 'get_yearly_emissions', timeit.default_timer() - spent_time) + return catalog + + def get_temporal_factors(self, catalog): + """ + Calculates the temporal factor for each point source and each time step. + + :param catalog: Catalog with the activity factor (AF) column and all the emission factor column for each + pollutant. + :type catalog: DataFrame + + :return: Catalog with yearly emissions of each point source for all the pollutants (as column names). + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating hourly emissions', message_level=2) + + def get_mf(df): + month_factor = self.monthly_profiles.loc[df.name[1], df.name[0]] + + df['MF'] = month_factor + return df.loc[:, ['MF']] + + def get_wf(df): + weekly_profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc[df.name[1], :].to_dict(), + df.name[0]) + df['WF'] = weekly_profile[df.name[0].weekday()] + return df.loc[:, ['WF']] + + def get_hf(df): + hourly_profile = self.hourly_profiles.loc[df.name[1], :].to_dict() + hour_factor = hourly_profile[df.name[0]] + + df['HF'] = hour_factor + return df.loc[:, ['HF']] + + catalog['month'] = catalog['date'].dt.month + catalog['weekday'] = catalog['date'].dt.weekday + catalog['hour'] = catalog['date'].dt.hour + catalog['date_as_date'] = catalog['date'].dt.date + + catalog['MF'] = catalog.groupby(['month', 'P_month']).apply(get_mf) + catalog['WF'] = catalog.groupby(['date_as_date', 'P_week']).apply(get_wf) + catalog['HF'] = catalog.groupby(['hour', 'P_hour']).apply(get_hf) + + catalog['temp_factor'] = catalog['MF'] * catalog['WF'] * catalog['HF'] + catalog.drop(['MF', 'WF', 'HF'], axis=1, inplace=True) + + catalog[self.source_pollutants] = catalog[self.source_pollutants].multiply(catalog['temp_factor'], axis=0) + + catalog.drop('temp_factor', axis=1, inplace=True) + + self.logger.write_time_log('PointSourceSector', 'get_temporal_factors', timeit.default_timer() - spent_time) + return catalog + + def calculate_hourly_emissions(self, catalog): + """ + Calculate the hourly emissions + + :param catalog: Catalog to calculate. + :type catalog: DataFrame + + :return: Catalog with the hourly emissions. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + catalog = self.get_yearly_emissions(catalog) + catalog = self.get_temporal_factors(catalog) + + catalog.set_index(['Code', 'tstep'], inplace=True) + self.logger.write_time_log('PointSourceSector', 'calculate_hourly_emissions', + timeit.default_timer() - spent_time) + return catalog + + @staticmethod + def get_meteo_xy(dataframe, netcdf_path): + def nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None): + """Finds the nearest point and return the corresponding value from specified column. + https://automating-gis-processes.github.io/2017/lessons/L3/nearest-neighbour.html + """ + from shapely.ops import nearest_points + + # Find the geometry that is closest + nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1] + # Get the corresponding value from df2 (matching is based on the geometry) + value = df2[nearest][src_column].get_values()[0] + return value + from netCDF4 import Dataset + from shapely.geometry import Point + import numpy as np + import pandas as pd + import geopandas as gpd + + nc = Dataset(netcdf_path, mode='r') + lats = nc.variables['lat'][:] + lons = nc.variables['lon'][:] + x = np.array([np.arange(lats.shape[1])] * lats.shape[0]) + y = np.array([np.arange(lats.shape[0]).T] * lats.shape[1]).T + + nc_dataframe = pd.DataFrame.from_dict({'X': x.flatten(), 'Y': y.flatten()}) + nc_dataframe = gpd.GeoDataFrame(nc_dataframe, + geometry=[Point(xy) for xy in zip(lons.flatten(), lats.flatten())], + crs={'init': 'epsg:4326'}) + nc_dataframe['index'] = nc_dataframe.index + + union = nc_dataframe.unary_union + dataframe['meteo_index'] = dataframe.apply( + nearest, geom_union=union, df1=dataframe, df2=nc_dataframe, src_column='index', axis=1) + + dataframe['X'] = nc_dataframe.loc[dataframe['meteo_index'], 'X'].values + dataframe['Y'] = nc_dataframe.loc[dataframe['meteo_index'], 'Y'].values + + return dataframe[['X', 'Y']] + + def get_plumerise_meteo(self, catalog): + def get_sfc_value(dataframe, dir_path, var_name): + from netCDF4 import Dataset, num2date + nc_path = os.path.join(dir_path, + '{0}_{1}.nc'.format(var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + netcdf = Dataset(nc_path, mode='r') + # time_index + time = netcdf.variables['time'] + nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in + num2date(time[:], time.units, time.calendar)] + time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) + + var = netcdf.variables[var_name][time_index, 0, :] + netcdf.close() + dataframe[var_name] = var[dataframe['X'], dataframe['Y']] + + return dataframe[[var_name]] + + def get_layers(dataframe, dir_path, var_name): + from netCDF4 import Dataset, num2date + nc_path = os.path.join(dir_path, + '{0}_{1}.nc'.format(var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + netcdf = Dataset(nc_path, mode='r') + # time_index + time = netcdf.variables['time'] + nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in + num2date(time[:], time.units, time.calendar)] + time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) + + var = np.flipud(netcdf.variables[var_name][time_index, :, :, :]) + netcdf.close() + var = var[:, dataframe['X'], dataframe['Y']] + + pre_t_lay = 0 + lay_list = [] + # lay_list = ['l_sfc'] + # dataframe['l_sfc'] = 2 + for i, t_lay in enumerate(var): + t_lay += pre_t_lay + dataframe['l_{0}'.format(i)] = t_lay + pre_t_lay = t_lay + lay_list.append('l_{0}'.format(i)) + + dataframe['layers'] = dataframe[lay_list].values.tolist() + + return dataframe[['layers']] + + def get_temp_top(dataframe, dir_path, var_name): + from netCDF4 import Dataset, num2date + from scipy.interpolate import interp1d as interpolate + + nc_path = os.path.join(dir_path, + '{0}_{1}.nc'.format(var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + netcdf = Dataset(nc_path, mode='r') + # time_index + time = netcdf.variables['time'] + nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in + num2date(time[:], time.units, time.calendar)] + time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) + + var = np.flipud(netcdf.variables[var_name][time_index, :, :, :]) + netcdf.close() + var = var[:, dataframe['X'], dataframe['Y']] + + lay_list = ['temp_sfc'] + for i, t_lay in enumerate(var): + dataframe['t_{0}'.format(i)] = t_lay + lay_list.append('t_{0}'.format(i)) + + # Setting minimum height to 2 because is the lowest temperature layer height. + dataframe.loc[dataframe['Height'] < 2, 'Height'] = 2 + + dataframe['temp_list'] = dataframe[lay_list].values.tolist() + dataframe.drop(columns=lay_list, inplace=True) + # Interpolation + for ind, row in dataframe.iterrows(): + f_temp = interpolate([2] + row.get('layers'), row.get('temp_list'), kind=INTERPOLATION_TYPE) + dataframe.loc[ind, 'temp_top'] = f_temp(row.get('Height')) + + return dataframe[['temp_top']] + + def get_wind_speed_10m(dataframe, u_dir_path, v_dir_path, u_var_name, v_var_name): + from netCDF4 import Dataset, num2date + # === u10 === + u10_nc_path = os.path.join( + u_dir_path, '{0}_{1}.nc'.format(u_var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + u10_netcdf = Dataset(u10_nc_path, mode='r') + # time_index + time = u10_netcdf.variables['time'] + nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in + num2date(time[:], time.units, time.calendar)] + time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) + + var = u10_netcdf.variables[u_var_name][time_index, 0, :] + u10_netcdf.close() + dataframe['u10'] = var[dataframe['X'], dataframe['Y']] + + # === v10 === + v10_nc_path = os.path.join( + v_dir_path, '{0}_{1}.nc'.format(v_var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + v10_netcdf = Dataset(v10_nc_path, mode='r') + + var = v10_netcdf.variables[v_var_name][time_index, 0, :] + v10_netcdf.close() + dataframe['v10'] = var[dataframe['X'], dataframe['Y']] + + # === wind speed === + dataframe['wSpeed_10'] = np.linalg.norm(dataframe[['u10', 'v10']].values, axis=1) + + return dataframe[['wSpeed_10']] + + def get_wind_speed_top(dataframe, u_dir_path, v_dir_path, u_var_name, v_var_name): + from netCDF4 import Dataset, num2date + from scipy.interpolate import interp1d as interpolate + # === u10 === + u10_nc_path = os.path.join( + u_dir_path, '{0}_{1}.nc'.format(u_var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + u10_netcdf = Dataset(u10_nc_path, mode='r') + # time_index + time = u10_netcdf.variables['time'] + nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in + num2date(time[:], time.units, time.calendar)] + time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) + + var = np.flipud(u10_netcdf.variables[u_var_name][time_index, :, :, :]) + u10_netcdf.close() + var = var[:, dataframe['X'], dataframe['Y']] + + for i, t_lay in enumerate(var): + dataframe['u_{0}'.format(i)] = t_lay + + # === v10 === + v10_nc_path = os.path.join( + v_dir_path, '{0}_{1}.nc'.format(v_var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + v10_netcdf = Dataset(v10_nc_path, mode='r') + + var = np.flipud(v10_netcdf.variables[v_var_name][time_index, :, :, :]) + v10_netcdf.close() + var = var[:, dataframe['X'], dataframe['Y']] + + ws_lay_list = ['wSpeed_10'] + for i, t_lay in enumerate(var): + dataframe['v_{0}'.format(i)] = t_lay + ws_lay_list.append('ws_{0}'.format(i)) + dataframe['ws_{0}'.format(i)] = np.linalg.norm(dataframe[['u_{0}'.format(i), 'v_{0}'.format(i)]].values, + axis=1) + # Setting minimum height to 10 because is the lowest wind Speed layer. + dataframe.loc[dataframe['Height'] < 10, 'Height'] = 10 + dataframe['ws_list'] = dataframe[ws_lay_list].values.tolist() + + for ind, row in dataframe.iterrows(): + f_ws = interpolate([10] + row.get('layers'), row.get('ws_list'), kind=INTERPOLATION_TYPE) + dataframe.loc[ind, 'windSpeed_top'] = f_ws(row.get('Height')) + + return dataframe[['windSpeed_top']] + + # TODO Use IoNetCDF + spent_time = timeit.default_timer() + # Adding meteo X, Y array index to the catalog + meteo_xy = self.get_meteo_xy(catalog.groupby('Code').first(), os.path.join( + self.plume_rise_pahts['temperature_sfc_dir'], + 't2_{0}.nc'.format(self.date_array[0].replace(hour=0).strftime("%Y%m%d%H")))) + + catalog = catalog.merge(meteo_xy, left_index=True, right_index=True) + + # ===== 3D Meteo variables ===== + # Adding stc_temp + self.logger.write_log('\t\tGetting temperature from {0}'.format(self.plume_rise_pahts['temperature_sfc_dir']), + message_level=3) + catalog['temp_sfc'] = catalog.groupby('date_utc')['X', 'Y'].apply( + lambda x: get_sfc_value(x, self.plume_rise_pahts['temperature_sfc_dir'], 't2')) + self.logger.write_log('\t\tGetting friction velocity from {0}'.format( + self.plume_rise_pahts['friction_velocity_dir']), message_level=3) + catalog['friction_v'] = catalog.groupby('date_utc')['X', 'Y'].apply( + lambda x: get_sfc_value(x, self.plume_rise_pahts['friction_velocity_dir'], 'ustar')) + self.logger.write_log('\t\tGetting PBL height from {0}'.format( + self.plume_rise_pahts['pblh_dir']), message_level=3) + catalog['pbl'] = catalog.groupby('date_utc')['X', 'Y'].apply( + lambda x: get_sfc_value(x, self.plume_rise_pahts['pblh_dir'], 'mixed_layer_height')) + self.logger.write_log('\t\tGetting obukhov length from {0}'.format( + self.plume_rise_pahts['obukhov_length_dir']), message_level=3) + catalog['obukhov_len'] = catalog.groupby('date_utc')['X', 'Y'].apply( + lambda x: get_sfc_value(x, self.plume_rise_pahts['obukhov_length_dir'], 'rmol')) + catalog['obukhov_len'] = 1. / catalog['obukhov_len'] + + self.logger.write_log('\t\tGetting layer thickness from {0}'.format( + self.plume_rise_pahts['layer_thickness_dir']), message_level=3) + catalog['layers'] = catalog.groupby('date_utc')['X', 'Y'].apply( + lambda x: get_layers(x, self.plume_rise_pahts['layer_thickness_dir'], 'layer_thickness')) + self.logger.write_log('\t\tGetting temperatue at the top from {0}'.format( + self.plume_rise_pahts['temperature_4d_dir']), message_level=3) + catalog['temp_top'] = catalog.groupby('date_utc')['X', 'Y', 'Height', 'layers', 'temp_sfc'].apply( + lambda x: get_temp_top(x, self.plume_rise_pahts['temperature_4d_dir'], 't')) + self.logger.write_log('\t\tGetting wind speed at 10 m', message_level=3) + catalog['wSpeed_10'] = catalog.groupby('date_utc')['X', 'Y'].apply( + lambda x: get_wind_speed_10m(x, self.plume_rise_pahts['u10_wind_speed_dir'], + self.plume_rise_pahts['v10_wind_speed_dir'], 'u10', 'v10')) + self.logger.write_log('\t\tGetting wind speed at the top', message_level=3) + catalog['wSpeed_top'] = catalog.groupby('date_utc')['X', 'Y', 'Height', 'layers', 'wSpeed_10'].apply( + lambda x: get_wind_speed_top(x, self.plume_rise_pahts['u_wind_speed_4d_dir'], + self.plume_rise_pahts['v_wind_speed_4d_dir'], 'u', 'v')) + catalog.drop(columns=['wSpeed_10', 'layers', 'X', 'Y'], inplace=True) + self.logger.write_time_log('PointSourceSector', 'get_plumerise_meteo', timeit.default_timer() - spent_time) + return catalog + + def get_plume_rise_top_bot(self, catalog): + spent_time = timeit.default_timer() + + catalog = self.get_plumerise_meteo(catalog) + + # Step 1: Bouyancy flux + catalog.loc[catalog['Temp'] <= catalog['temp_top'], 'Fb'] = 0 + catalog.loc[catalog['Temp'] > catalog['temp_top'], 'Fb'] = ((catalog['Temp'] - catalog['temp_top']) / catalog[ + 'Temp']) * ((catalog['Speed'] * np.square(catalog['Diameter'])) / 4.) * GRAVITY + + # Step 2: Stability parameter + catalog['S'] = np.maximum( + (GRAVITY / catalog['temp_top']) * (((catalog['temp_top'] - catalog['temp_sfc']) / catalog['Height']) + + (GRAVITY / CP)), + 0.047 / catalog['temp_top']) + + # Step 3: Plume thickness + catalog.reset_index(inplace=True) + neutral_atm = (catalog['obukhov_len'] > 2. * catalog['Height']) | ( + catalog['obukhov_len'] < -0.25 * catalog['Height']) + stable_atm = ((catalog['obukhov_len'] > 0) & (catalog['obukhov_len'] < 2 * catalog['Height'])) | ( + catalog['Height'] > catalog['pbl']) + unstable_atm = ((catalog['obukhov_len'] > -0.25 * catalog['Height']) & (catalog['obukhov_len'] < 0)) + + catalog.loc[neutral_atm, 'Ah'] = np.minimum( + 39 * (np.power(catalog['Fb'], 3. / 5.) / catalog['wSpeed_top']), + 1.2 * np.power(catalog['Fb'] / (catalog['wSpeed_top'] * np.square(catalog['friction_v'])), 3. / 5.) * + np.power(catalog['Height'] + (1.3 * (catalog['Fb'] / (catalog['wSpeed_top'] * np.square( + catalog['friction_v'])))), 2. / 5.)) + # catalog.loc[unstable_atm, 'Ah'] = 30. * np.power(catalog['Fb'] / catalog['wSpeed_top'], 3./5.) + catalog.loc[unstable_atm, 'Ah'] = np.minimum( + 3. * np.power(catalog['Fb'] / catalog['wSpeed_top'], 3. / 5.) * np.power( + -2.5 * np.power(catalog['friction_v'], 3.) / catalog['obukhov_len'], -2. / 5.), + 30. * np.power(catalog['Fb'] / catalog['wSpeed_top'], 3. / 5.)) + catalog.loc[stable_atm, 'Ah'] = 2.6 * np.power(catalog['Fb'] / (catalog['wSpeed_top'] * catalog['S']), 1. / 3.) + + # Step 4: Plume rise + catalog['h_top'] = (1.5 * catalog['Ah']) + catalog['Height'] + catalog['h_bot'] = (0.5 * catalog['Ah']) + catalog['Height'] + + catalog.drop(columns=['Height', 'Diameter', 'Speed', 'Temp', 'date_utc', 'temp_sfc', 'friction_v', 'pbl', + 'obukhov_len', 'temp_top', 'wSpeed_top', 'Fb', 'S', 'Ah'], inplace=True) + self.logger.write_time_log('PointSourceSector', 'get_plume_rise_top_bot', timeit.default_timer() - spent_time) + return catalog + + def set_layer(self, catalog): + spent_time = timeit.default_timer() + + # catalog.set_index(['Code', 'tstep'], inplace=True) + catalog['percent'] = 1. + catalog_by_layer = [] + last_layer = 0 + for layer, v_lev in enumerate(self.vertical_levels): + # filtering catalog + aux_catalog = catalog.loc[(catalog['percent'] > 0) & (catalog['h_bot'] < v_lev), :].copy() + + aux_catalog['aux_percent'] = (((v_lev - aux_catalog['h_bot']) * aux_catalog['percent']) / + (aux_catalog['h_top'] - aux_catalog['h_bot'])) + # inf are the ones that h_top == h_bot + aux_catalog['aux_percent'].replace(np.inf, 1., inplace=True) + # percentages higher than 'percent' are due to the ones that are the last layer + aux_catalog.loc[aux_catalog['aux_percent'] > aux_catalog['percent'], 'aux_percent'] = \ + aux_catalog['percent'] + + aux_catalog[self.source_pollutants] = aux_catalog[self.source_pollutants].multiply( + aux_catalog['aux_percent'], axis=0) + aux_catalog['layer'] = layer + + catalog.loc[aux_catalog.index, 'percent'] = aux_catalog['percent'] - aux_catalog['aux_percent'] + + catalog.loc[aux_catalog.index, 'h_bot'] = v_lev + + aux_catalog.drop(columns=['h_top', 'h_bot', 'percent', 'aux_percent'], inplace=True) + catalog_by_layer.append(aux_catalog) + last_layer = layer + + # catalog_by_layer = pd.concat(catalog_by_layer) + + unused = catalog.loc[catalog['percent'] > 0, :] + + # catalog_by_layer.set_index(['Code', 'tstep', 'layer'], inplace=True) + if len(unused) > 0: + warn('WARNING: Some point sources have to allocate the emissions higher than the last vertical level:\n' + + '{0}'.format(unused.loc[:, ['Code', 'tstep', 'h_top']])) + unused['layer'] = last_layer + # unused.set_index(['Code', 'tstep', 'layer'], inplace=True) + unused[self.source_pollutants] = unused[self.source_pollutants].multiply(unused['percent'], axis=0) + unused.drop(columns=['h_top', 'h_bot', 'percent'], inplace=True) + catalog_by_layer.append(unused) + + catalog_by_layer = pd.concat(catalog_by_layer) + catalog_by_layer.set_index(['Code', 'tstep', 'layer'], inplace=True) + + new_catalog = catalog_by_layer[~catalog_by_layer.index.duplicated(keep='first')] + new_catalog[self.source_pollutants] = catalog_by_layer.groupby(['Code', 'tstep', 'layer'])[ + self.source_pollutants].sum() + self.logger.write_time_log('PointSourceSector', 'set_layer', timeit.default_timer() - spent_time) + return new_catalog + + def calculate_vertical_distribution(self, catalog): + """ + Add the layer column to indicate at what layer the emission have to go. + + :param catalog: Catalog to calculate. + :type catalog: DataFrame + + :return: Catalog with the level. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + if self.plume_rise: + + catalog = self.get_plume_rise_top_bot(catalog) + catalog = self.set_layer(catalog) + + else: + catalog['Height'] = catalog['Height'] * 1.2 + + catalog['layer'] = np.searchsorted(self.vertical_levels, catalog['Height'], side='left') + + catalog.drop('Height', axis=1, inplace=True) + catalog.reset_index(inplace=True) + catalog.set_index(['Code', 'tstep', 'layer'], inplace=True) + + self.logger.write_time_log('PointSourceSector', 'calculate_vertical_distribution', + timeit.default_timer() - spent_time) + return catalog + + def add_measured_emissions(self, catalog): + spent_time = timeit.default_timer() + + def func(x, pollutant): + measured_emissions = self.measured_path.replace('', x.name) + measured_emissions = pd.read_csv(measured_emissions, sep=';') + measured_emissions = measured_emissions.loc[measured_emissions['Code'] == x.name, :] + + measured_emissions['date'] = pd.to_datetime(measured_emissions['date']) + pd.to_timedelta( + measured_emissions['local_to_UTC'], unit='h') + + measured_emissions.drop('local_to_UTC', axis=1, inplace=True) + + code = x.name + x = pd.DataFrame(x) + x.rename(columns={code: 'date'}, inplace=True) + + test = pd.merge(left=x, right=measured_emissions.loc[:, ['date', pollutant]], on='date', how='inner') + + try: + test.set_index(x.index, inplace=True) + except ValueError: + raise IOError('No measured emissions for the selected dates: {0}'.format(x.values)) + + return test[pollutant] + + for pollutant in self.source_pollutants: + catalog[pollutant] = catalog.groupby('Code')['date'].apply(lambda x: func(x, pollutant)) + + self.logger.write_time_log('PointSourceSector', 'add_measured_emissions', timeit.default_timer() - spent_time) + return catalog + + def calculate_measured_emissions(self, catalog): + spent_time = timeit.default_timer() + + if len(catalog) == 0: + catalog = None + else: + catalog = self.to_geodataframe(catalog) + catalog = self.add_dates(catalog) + catalog = self.add_measured_emissions(catalog) + + catalog.set_index(['Code', 'tstep'], inplace=True) + self.logger.write_time_log('PointSourceSector', 'calculate_measured_emissions', + timeit.default_timer() - spent_time) + return catalog + + def merge_catalogs(self, catalog_list): + spent_time = timeit.default_timer() + + catalog = pd.concat(catalog_list) + self.logger.write_time_log('PointSourceSector', 'merge_catalogs', timeit.default_timer() - spent_time) + return catalog + + def speciate(self, dataframe, code='default'): + spent_time = timeit.default_timer() + self.logger.write_log('\t\tSpeciating {0} emissions'.format(code), message_level=2) + + new_dataframe = gpd.GeoDataFrame(index=dataframe.index, data=None, crs=dataframe.crs, + geometry=dataframe.geometry) + for out_pollutant in self.output_pollutants: + input_pollutant = self.speciation_map[out_pollutant] + if input_pollutant == 'nmvoc' and input_pollutant in dataframe.columns.values: + self.logger.write_log("\t\t\t{0} = {4}*({1}/{2})*{3}".format( + out_pollutant, input_pollutant, self.molecular_weights[input_pollutant], + self.speciation_profile.loc[code, out_pollutant], + self.speciation_profile.loc[code, 'VOCtoTOG']), message_level=3) + new_dataframe[out_pollutant] = \ + self.speciation_profile.loc[code, 'VOCtoTOG'] * ( + dataframe[input_pollutant] / + self.molecular_weights[input_pollutant]) * self.speciation_profile.loc[code, out_pollutant] + else: + if out_pollutant != 'PMC': + self.logger.write_log("\t\t\t{0} = ({1}/{2})*{3}".format( + out_pollutant, input_pollutant, + self.molecular_weights[input_pollutant], + self.speciation_profile.loc[code, out_pollutant]), message_level=3) + if input_pollutant in dataframe.columns.values: + new_dataframe[out_pollutant] = (dataframe[input_pollutant] / + self.molecular_weights[input_pollutant]) * \ + self.speciation_profile.loc[code, out_pollutant] + else: + self.logger.write_log("\t\t\t{0} = ({1}/{2} - {4}/{5})*{3}".format( + out_pollutant, 'pm10', self.molecular_weights['pm10'], + self.speciation_profile.loc[code, out_pollutant], 'pm25', self.molecular_weights['pm25']), + message_level=3) + + new_dataframe[out_pollutant] = \ + ((dataframe['pm10'] / self.molecular_weights['pm10']) - + (dataframe['pm25'] / self.molecular_weights['pm25'])) * \ + self.speciation_profile.loc[code, out_pollutant] + self.logger.write_time_log('Sector', 'speciate', timeit.default_timer() - spent_time) + return new_dataframe + + def point_source_to_fid(self, catalog): + catalog.reset_index(inplace=True) + catalog = catalog.to_crs(self.grid_shp.crs) + + catalog = gpd.sjoin(catalog, self.grid_shp.reset_index(), how="inner", op='intersects') + + # Drops duplicates when the point source is on the boundary of the cell + catalog = catalog[~catalog.index.duplicated(keep='first')] + + columns_to_drop = ['Code', 'index_right', 'index'] + for del_col in columns_to_drop: + if del_col in catalog.columns.values: + catalog.drop(columns=[del_col], inplace=True) + + catalog = catalog.groupby(['FID', 'layer', 'tstep']).sum() + + return catalog + + def calculate_emissions(self): + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + emissions = self.add_dates(self.catalog, drop_utc=False) + emissions = self.calculate_hourly_emissions(emissions) + + if self.comm.Get_rank() == 0: + emissions_measured = self.calculate_measured_emissions(self.catalog_measured) + else: + emissions_measured = None + + if emissions_measured is not None: + emissions = self.merge_catalogs([emissions, emissions_measured]) + emissions = self.calculate_vertical_distribution(emissions) + + emis_list = [] + for spec, spec_df in emissions.groupby('P_spec'): + emis_list.append(self.speciate(spec_df, spec)) + emissions = pd.concat(emis_list) + + emissions = self.point_source_to_fid(emissions) + # From kmol/h or kg/h to mol/h or g/h + emissions = emissions.mul(1000.0) + + self.logger.write_log('\t\tPoint sources emissions calculated', message_level=2) + self.logger.write_time_log('PointSourceSector', 'calculate_emissions', timeit.default_timer() - spent_time) + + return emissions diff --git a/hermesv3_bu/sectors/recreational_boats_sector.py b/hermesv3_bu/sectors/recreational_boats_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..59177fcfa3cf83a97ce0f5c2d911fd443383eb79 --- /dev/null +++ b/hermesv3_bu/sectors/recreational_boats_sector.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python + +import os +import timeit + +import numpy as np +import pandas as pd +import geopandas as gpd + +from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.io_server.io_raster import IoRaster +from hermesv3_bu.logger.log import Log + + +class RecreationalBoatsSector(Sector): + def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + boat_list, density_map_path, boats_data_path, ef_file_path, monthly_profiles_path, + weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, + molecular_weights_path): + spent_time = timeit.default_timer() + + super(RecreationalBoatsSector, self).__init__( + comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path) + + self.boat_list = boat_list + + # TODO Change nodata value of the raster + self.density_map = self.create_density_map(density_map_path) + self.boats_data_path = boats_data_path + self.ef_file_path = ef_file_path + + self.logger.write_time_log('RecreationalBoatsSector', '__init__', timeit.default_timer() - spent_time) + + def create_density_map(self, density_map_path): + spent_time = timeit.default_timer() + if self.comm.Get_rank() == 0: + density_map_auxpath = os.path.join(self.auxiliary_dir, 'recreational_boats', 'density_map.shp') + if not os.path.exists(density_map_auxpath): + src_density_map = IoRaster(self.comm).to_shapefile_serie(density_map_path, nodata=0) + src_density_map = src_density_map.loc[src_density_map['data'] > 0] + src_density_map['data'] = src_density_map['data'] / src_density_map['data'].sum() + src_density_map.to_crs(self.grid_shp.crs, inplace=True) + src_density_map['src_inter_fraction'] = src_density_map.area + src_density_map = self.spatial_overlays(src_density_map, self.grid_shp.reset_index(), + how='intersection') + src_density_map['src_inter_fraction'] = src_density_map.area / src_density_map['src_inter_fraction'] + + src_density_map['data'] = src_density_map.loc[:, 'data'].multiply(src_density_map["src_inter_fraction"], + axis="index") + + src_density_map = src_density_map.loc[:, ['FID', 'data']].groupby('FID').sum() + src_density_map = gpd.GeoDataFrame(src_density_map, crs=self.grid_shp.crs, + geometry=self.grid_shp.loc[src_density_map.index, 'geometry']) + src_density_map.reset_index(inplace=True) + + IoShapefile(self.comm).write_shapefile_serial(src_density_map, density_map_auxpath) + else: + src_density_map = IoShapefile(self.comm).read_shapefile_serial(density_map_auxpath) + else: + src_density_map = None + src_density_map = IoShapefile(self.comm).split_shapefile(src_density_map) + src_density_map.set_index('FID', inplace=True) + + self.logger.write_time_log('RecreationalBoatsSector', 'create_density_map', timeit.default_timer() - spent_time) + return src_density_map + + def speciate_dict(self, annual_emissions_dict): + spent_time = timeit.default_timer() + + speciated_emissions = {} + for out_pollutant in self.output_pollutants: + if out_pollutant != 'PMC': + self.logger.write_log("\t\t\t{0} = ({1}/{2})*{3}".format( + out_pollutant, self.speciation_map[out_pollutant], + self.molecular_weights[self.speciation_map[out_pollutant]], + self.speciation_profile.loc['default', out_pollutant]), message_level=3) + + speciated_emissions[out_pollutant] = (annual_emissions_dict[self.speciation_map[out_pollutant]] / + self.molecular_weights[self.speciation_map[out_pollutant]] + ) * self.speciation_profile.loc['default', out_pollutant] + else: + self.logger.write_log("\t\t\t{0} = ({1}/{2} - {4}/{5})*{3}".format( + out_pollutant, 'pm10', self.molecular_weights['pm10'], + self.speciation_profile.loc['default', out_pollutant], 'pm25', self.molecular_weights['pm25']), + message_level=3) + speciated_emissions[out_pollutant] = ((annual_emissions_dict['pm10'] / self.molecular_weights['pm10']) - + (annual_emissions_dict['pm25'] / self.molecular_weights['pm25']) + ) * self.speciation_profile.loc['default', out_pollutant] + self.logger.write_time_log('RecreationalBoatsSector', 'speciate_dict', timeit.default_timer() - spent_time) + return speciated_emissions + + def get_annual_emissions(self): + spent_time = timeit.default_timer() + + emissions_dict = {} + + data = pd.read_csv(self.boats_data_path, usecols=['code', 'number', 'nominal_power', 'Ann_hours', 'LF']) + data['AF'] = data['number'] * data['Ann_hours'] * data['nominal_power'] * data['LF'] + # Emission Factor in g/kWh + ef_dataframe = pd.read_csv(self.ef_file_path) + dataframe = pd.merge(data.loc[:, ['code', 'AF']], ef_dataframe, on='code') + for in_p in self.source_pollutants: + emissions_dict[in_p] = dataframe['AF'].multiply(dataframe['EF_{0}'.format(in_p)]).sum() + + self.logger.write_time_log('RecreationalBoatsSector', 'get_annual_emissions', + timeit.default_timer() - spent_time) + return emissions_dict + + def calculate_yearly_emissions(self, annual_emissions): + spent_time = timeit.default_timer() + + new_dataframe = self.density_map.copy() + new_dataframe.drop(columns='data', inplace=True) + + for pollutant, annual_value in annual_emissions.iteritems(): + new_dataframe[pollutant] = self.density_map['data'] * annual_value + + self.logger.write_time_log('RecreationalBoatsSector', 'calculate_yearly_emissions', + timeit.default_timer() - spent_time) + return new_dataframe + + def dates_to_month_weekday_hour(self, dataframe): + spent_time = timeit.default_timer() + dataframe['month'] = dataframe['date'].dt.month + dataframe['weekday'] = dataframe['date'].dt.weekday + dataframe['hour'] = dataframe['date'].dt.hour + + self.logger.write_time_log('RecreationalBoatsSector', 'dates_to_month_weekday_hour', + timeit.default_timer() - spent_time) + return dataframe + + def calculate_hourly_emissions(self, annual_distribution): + spent_time = timeit.default_timer() + + def get_mf(df): + month_factor = self.monthly_profiles.loc['default', df.name] + + df['MF'] = month_factor + return df.loc[:, ['MF']] + + def get_wf(df): + weekly_profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc['default', :].to_dict(), + df.name) + df['WF'] = weekly_profile[df.name.weekday()] + return df.loc[:, ['WF']] + + def get_hf(df): + hourly_profile = self.hourly_profiles.loc['default', :].to_dict() + hour_factor = hourly_profile[df.name] + + df['HF'] = hour_factor + return df.loc[:, ['HF']] + + dataframe = self.add_dates(annual_distribution) + dataframe = self.dates_to_month_weekday_hour(dataframe) + + dataframe['date_as_date'] = dataframe['date'].dt.date + + dataframe['MF'] = dataframe.groupby('month').apply(get_mf) + dataframe[self.output_pollutants] = dataframe[self.output_pollutants].multiply(dataframe['MF'], axis=0) + dataframe.drop(columns=['month', 'MF'], inplace=True) + + dataframe['WF'] = dataframe.groupby('date_as_date').apply(get_wf) + dataframe[self.output_pollutants] = dataframe[self.output_pollutants].multiply(dataframe['WF'], axis=0) + dataframe.drop(columns=['weekday', 'date', 'date_as_date', 'WF'], inplace=True) + + dataframe['HF'] = dataframe.groupby('hour').apply(get_hf) + dataframe[self.output_pollutants] = dataframe[self.output_pollutants].multiply(dataframe['HF'], axis=0) + dataframe.drop(columns=['hour', 'HF'], inplace=True) + + self.logger.write_time_log('RecreationalBoatsSector', 'calculate_hourly_emissions', + timeit.default_timer() - spent_time) + return dataframe + + def calculate_emissions(self): + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + annual_emissions = self.get_annual_emissions() + annual_emissions = self.speciate_dict(annual_emissions) + + distribution = self.calculate_yearly_emissions(annual_emissions) + distribution = self.calculate_hourly_emissions(distribution) + distribution.drop(columns=['geometry'], inplace=True) + distribution['layer'] = 0 + distribution.set_index(['FID', 'layer', 'tstep'], inplace=True) + self.logger.write_log('\t\tRecreational boats emissions calculated', message_level=2) + self.logger.write_time_log('RecreationalBoatsSector', 'calculate_emissions', + timeit.default_timer() - spent_time) + return distribution diff --git a/hermesv3_bu/sectors/residential_sector.py b/hermesv3_bu/sectors/residential_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..fdd12479b6002f20f3da2d05dcea02d313dcd2fc --- /dev/null +++ b/hermesv3_bu/sectors/residential_sector.py @@ -0,0 +1,454 @@ +#!/usr/bin/env python + +import sys +import os +import timeit + +import numpy as np +import pandas as pd +import geopandas as gpd + +from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.io_server.io_raster import IoRaster +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.io_server.io_netcdf import IoNetcdf +from hermesv3_bu.logger.log import Log + + +class ResidentialSector(Sector): + def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + fuel_list, prov_shapefile, ccaa_shapefile, population_density_map, population_type_map, + population_type_by_ccaa, population_type_by_prov, energy_consumption_by_prov, + energy_consumption_by_ccaa, residential_spatial_proxies, residential_ef_files_path, + heating_degree_day_path, temperature_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path): + spent_time = timeit.default_timer() + + super(ResidentialSector, self).__init__( + comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + None, None, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path) + + # Heating Degree Day constants + self.hdd_base_temperature = 15.5 + self.hdd_f_biomass = 0.0 + self.hdd_f_others = 0.2 + + self.fuel_list = fuel_list + self.day_dict = self.calculate_num_days() + + self.pop_type_by_prov = population_type_by_prov + self.pop_type_by_ccaa = population_type_by_ccaa + + self.energy_consumption_by_prov = pd.read_csv(energy_consumption_by_prov) + self.energy_consumption_by_ccaa = pd.read_csv(energy_consumption_by_ccaa) + self.residential_spatial_proxies = self.read_residential_spatial_proxies(residential_spatial_proxies) + self.ef_profiles = self.read_ef_file(residential_ef_files_path) + + if self.comm.Get_rank() == 0: + self.fuel_distribution = self.get_fuel_distribution(prov_shapefile, ccaa_shapefile, population_density_map, + population_type_map, create_pop_csv=False) + else: + self.fuel_distribution = None + self.fuel_distribution = IoShapefile(self.comm).split_shapefile(self.fuel_distribution) + + self.heating_degree_day_path = heating_degree_day_path + self.temperature_path = temperature_path + + self.logger.write_time_log('ResidentialSector', '__init__', timeit.default_timer() - spent_time) + + def read_ef_file(self, path): + """ + Read the emission factor file. + Units = g/GJ + + :param path: Path to the file that contains the emission factors (EF). + :type path: str + + :return: Dataframe with the emission factors read. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + df_ef = pd.read_csv(path) + df_ef = df_ef.loc[df_ef['fuel_type'].isin(self.fuel_list), ['fuel_type'] + self.source_pollutants] + + self.logger.write_time_log('ResidentialSector', 'read_ef_file', timeit.default_timer() - spent_time) + return df_ef + + def calculate_num_days(self): + spent_time = timeit.default_timer() + + day_array = [hour.date() for hour in self.date_array] + days, num_days = np.unique(day_array, return_counts=True) + + day_dict = {} + for key, value in zip(days, num_days): + day_dict[key] = value + + self.logger.write_time_log('ResidentialSector', 'calculate_num_days', timeit.default_timer() - spent_time) + return day_dict + + def read_residential_spatial_proxies(self, path): + spent_time = timeit.default_timer() + + spatial_proxies = pd.read_csv(path) + spatial_proxies = spatial_proxies.loc[spatial_proxies['fuel_type'].isin(self.fuel_list), :] + + self.logger.write_time_log('ResidentialSector', 'read_residential_spatial_proxies', + timeit.default_timer() - spent_time) + return spatial_proxies + + def get_spatial_proxy(self, fuel_type): + spent_time = timeit.default_timer() + + proxy = self.residential_spatial_proxies.loc[self.residential_spatial_proxies['fuel_type'] == fuel_type, + ['nuts_level', 'proxy']].values[0] + + if proxy[0] == 3: + nut_level = 'prov' + elif proxy[0] == 2: + nut_level = 'ccaa' + else: + nut_level = proxy[0] + + if proxy[1] == 'urban': + proxy_type = 3 + elif proxy[1] == 'rural': + proxy_type = 1 + else: + proxy_type = proxy[1] + + self.logger.write_time_log('ResidentialSector', 'get_spatial_proxy', timeit.default_timer() - spent_time) + return {'nut_level': nut_level, 'proxy_type': proxy_type} + + def to_dst_resolution(self, src_distribution): + spent_time = timeit.default_timer() + + src_distribution.to_crs(self.grid_shp.crs, inplace=True) + src_distribution.to_file(os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_src.shp')) + src_distribution['src_inter_fraction'] = src_distribution.geometry.area + src_distribution = self.spatial_overlays(src_distribution, self.grid_shp.reset_index(), how='intersection') + src_distribution.to_file(os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_raw.shp')) + src_distribution['src_inter_fraction'] = src_distribution.geometry.area / src_distribution[ + 'src_inter_fraction'] + + src_distribution[self.fuel_list] = src_distribution.loc[:, self.fuel_list].multiply( + src_distribution["src_inter_fraction"], axis="index") + + src_distribution = src_distribution.loc[:, self.fuel_list + ['FID']].groupby('FID').sum() + src_distribution = gpd.GeoDataFrame(src_distribution, crs=self.grid_shp.crs, + geometry=self.grid_shp.loc[src_distribution.index, 'geometry']) + src_distribution.reset_index(inplace=True) + + self.logger.write_time_log('ResidentialSector', 'to_dst_resolution', timeit.default_timer() - spent_time) + return src_distribution + + def get_fuel_distribution(self, prov_shapefile, ccaa_shapefile, population_density_map, population_type_map, + create_pop_csv=False): + spent_time = timeit.default_timer() + self.logger.write_log('Calculating fuel distribution', message_level=2) + fuel_distribution_path = os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution.shp') + + if not os.path.exists(fuel_distribution_path): + + population_density = IoRaster(self.comm).clip_raster_with_shapefile_poly( + population_density_map, self.clip.shapefile, + os.path.join(self.auxiliary_dir, 'residential', 'population_density.tif')) + population_density = IoRaster(self.comm).to_shapefile_serie(population_density) + + population_density.rename(columns={'data': 'pop'}, inplace=True) + + population_type = IoRaster(self.comm).clip_raster_with_shapefile_poly( + population_type_map, self.clip.shapefile, + os.path.join(self.auxiliary_dir, 'residential', 'population_type.tif')) + population_type = IoRaster(self.comm).to_shapefile_serie(population_type) + population_type.rename(columns={'data': 'type'}, inplace=True) + + population_density['type'] = population_type['type'] + population_density.loc[population_density['type'] == 2, 'type'] = 3 + + population_density = self.add_nut_code(population_density, prov_shapefile, nut_value='ORDER07') + population_density.rename(columns={'nut_code': 'prov'}, inplace=True) + + population_density = population_density.loc[population_density['prov'] != -999, :] + population_density = self.add_nut_code(population_density, ccaa_shapefile, nut_value='ORDER06') + population_density.rename(columns={'nut_code': 'ccaa'}, inplace=True) + population_density = population_density.loc[population_density['ccaa'] != -999, :] + + if create_pop_csv: + population_density.loc[:, ['prov', 'pop', 'type']].groupby(['prov', 'type']).sum().reset_index().to_csv( + self.pop_type_by_prov) + population_density.loc[:, ['ccaa', 'pop', 'type']].groupby(['ccaa', 'type']).sum().reset_index().to_csv( + self.pop_type_by_ccaa) + + self.pop_type_by_ccaa = pd.read_csv(self.pop_type_by_ccaa).set_index(['ccaa', 'type']) + self.pop_type_by_prov = pd.read_csv(self.pop_type_by_prov).set_index(['prov', 'type']) + + fuel_distribution = population_density.loc[:, ['CELL_ID', 'geometry']].copy() + + for fuel in self.fuel_list: + fuel_distribution[fuel] = 0 + + spatial_proxy = self.get_spatial_proxy(fuel) + + if spatial_proxy['nut_level'] == 'ccaa': + for ccaa in np.unique(population_density['ccaa']): + if spatial_proxy['proxy_type'] == 'all': + total_pop = self.pop_type_by_ccaa.loc[ + self.pop_type_by_ccaa.index.get_level_values('ccaa') == ccaa, 'pop'].sum() + energy_consumption = self.energy_consumption_by_ccaa.loc[ + self.energy_consumption_by_ccaa['code'] == ccaa, fuel].values[0] + + fuel_distribution.loc[ + population_density['ccaa'] == ccaa, fuel] = population_density['pop'].multiply( + energy_consumption / total_pop) + else: + total_pop = self.pop_type_by_ccaa.loc[ + (self.pop_type_by_ccaa.index.get_level_values('ccaa') == ccaa) & + (self.pop_type_by_ccaa.index.get_level_values('type') == spatial_proxy['proxy_type']), + 'pop'].values[0] + energy_consumption = self.energy_consumption_by_ccaa.loc[ + self.energy_consumption_by_ccaa['code'] == ccaa, fuel].values[0] + + fuel_distribution.loc[(population_density['ccaa'] == ccaa) & + (population_density['type'] == spatial_proxy['proxy_type']), + fuel] = population_density['pop'].multiply( + energy_consumption / total_pop) + if spatial_proxy['nut_level'] == 'prov': + for prov in np.unique(population_density['prov']): + if spatial_proxy['proxy_type'] == 'all': + total_pop = self.pop_type_by_prov.loc[self.pop_type_by_prov.index.get_level_values( + 'prov') == prov, 'pop'].sum() + energy_consumption = self.energy_consumption_by_prov.loc[ + self.energy_consumption_by_prov['code'] == prov, fuel].values[0] + + fuel_distribution.loc[population_density['prov'] == prov, fuel] = population_density[ + 'pop'].multiply(energy_consumption / total_pop) + else: + total_pop = self.pop_type_by_prov.loc[ + (self.pop_type_by_prov.index.get_level_values('prov') == prov) & + (self.pop_type_by_prov.index.get_level_values('type') == spatial_proxy['proxy_type']), + 'pop'].values[0] + energy_consumption = self.energy_consumption_by_prov.loc[ + self.energy_consumption_by_prov['code'] == prov, fuel].values[0] + + fuel_distribution.loc[(population_density['prov'] == prov) & + (population_density['type'] == spatial_proxy['proxy_type']), + fuel] = population_density['pop'].multiply( + energy_consumption / total_pop) + fuel_distribution = self.to_dst_resolution(fuel_distribution) + IoShapefile(self.comm).write_shapefile_serial(fuel_distribution, fuel_distribution_path) + else: + fuel_distribution = IoShapefile(self.comm).read_shapefile_serial(fuel_distribution_path) + + self.logger.write_time_log('ResidentialSector', 'get_fuel_distribution', timeit.default_timer() - spent_time) + return fuel_distribution + + def calculate_daily_distribution(self, day): + import calendar + spent_time = timeit.default_timer() + + if calendar.isleap(day.year): + num_days = 366 + else: + num_days = 365 + + geometry_shp = self.fuel_distribution.loc[:, ['FID', 'geometry']].to_crs({'init': 'epsg:4326'}) + geometry_shp['c_lat'] = geometry_shp.centroid.y + geometry_shp['c_lon'] = geometry_shp.centroid.x + geometry_shp['centroid'] = geometry_shp.centroid + geometry_shp.drop(columns='geometry', inplace=True) + + meteo = IoNetcdf(self.comm).get_data_from_netcdf( + os.path.join(self.temperature_path, 'tas_{0}{1}.nc'.format(day.year, str(day.month).zfill(2))), + 'tas', 'daily', day, geometry_shp) + # From K to Celsius degrees + meteo['tas'] = meteo['tas'] - 273.15 + + # HDD(x,y,d) = max(Tb - Tout(x,y,d), 1) + meteo['hdd'] = np.maximum(self.hdd_base_temperature - meteo['tas'], 1) + meteo.drop('tas', axis=1, inplace=True) + + meteo['hdd_mean'] = IoNetcdf(self.comm).get_data_from_netcdf(self.heating_degree_day_path.replace( + '', str(day.year)), 'HDD', 'yearly', day, geometry_shp).loc[:, 'HDD'] + + daily_distribution = self.fuel_distribution.copy() + + daily_distribution = daily_distribution.to_crs({'init': 'epsg:4326'}) + daily_distribution['centroid'] = daily_distribution.centroid + + daily_distribution['REC'] = daily_distribution.apply( + self.nearest, geom_union=meteo.unary_union, df1=daily_distribution, df2=meteo, geom1_col='centroid', + src_column='REC', axis=1) + daily_distribution = pd.merge(daily_distribution, meteo, how='left', on='REC') + + daily_distribution.drop(columns=['centroid', 'REC', 'geometry_y'], axis=1, inplace=True) + daily_distribution.rename(columns={'geometry_x': 'geometry'}, inplace=True) + + for fuel in self.fuel_list: + # Selection of factor for HDD as a function of fuel type + if fuel.startswith('B_'): + hdd_f = self.hdd_f_biomass + else: + hdd_f = self.hdd_f_others + + daily_distribution.loc[:, fuel] = daily_distribution[fuel].multiply( + (daily_distribution['hdd'] + hdd_f * daily_distribution['hdd_mean']) / + (num_days*((1 + hdd_f)*daily_distribution['hdd_mean'])) + ) + + daily_distribution.drop(['hdd', 'hdd_mean'], axis=1, inplace=True) + + self.logger.write_time_log('ResidentialSector', 'calculate_daily_distribution', + timeit.default_timer() - spent_time) + return daily_distribution + + def get_fuel_distribution_by_day(self): + spent_time = timeit.default_timer() + + daily_distribution = {} + for day in self.day_dict.keys(): + daily_distribution[day] = self.calculate_daily_distribution(day) + + self.logger.write_time_log('ResidentialSector', 'get_fuel_distribution_by_day', + timeit.default_timer() - spent_time) + return daily_distribution + + def calculate_hourly_distribution(self, fuel_distribution): + spent_time = timeit.default_timer() + + fuel_distribution['hour'] = fuel_distribution['date'].dt.hour + for fuel in self.fuel_list: + if fuel.startswith('B_'): + fuel_distribution.loc[:, fuel] = fuel_distribution.groupby('hour')[fuel].apply( + lambda x: x.multiply(self.hourly_profiles.loc['biomass', x.name]) + ) + else: + fuel_distribution.loc[:, fuel] = fuel_distribution.groupby('hour')[fuel].apply( + lambda x: x.multiply(self.hourly_profiles.loc['others', x.name]) + ) + fuel_distribution.drop('hour', axis=1, inplace=True) + + self.logger.write_time_log('ResidentialSector', 'calculate_hourly_distribution', + timeit.default_timer() - spent_time) + return fuel_distribution + + def add_dates(self, df_by_day): + spent_time = timeit.default_timer() + + df_list = [] + for tstep, date in enumerate(self.date_array): + df_aux = df_by_day[date.date()].copy() + df_aux['date'] = pd.to_datetime(date, utc=True) + df_aux['date_utc'] = pd.to_datetime(date, utc=True) + df_aux['tstep'] = tstep + # df_aux = self.to_timezone(df_aux) + df_list.append(df_aux) + dataframe_by_day = pd.concat(df_list, ignore_index=True) + + dataframe_by_day = self.to_timezone(dataframe_by_day) + self.logger.write_time_log('ResidentialSector', 'add_dates', timeit.default_timer() - spent_time) + + return dataframe_by_day + + def calculate_fuel_distribution_by_hour(self): + spent_time = timeit.default_timer() + + self.fuel_distribution = self.add_timezone(self.fuel_distribution) + + fuel_distribution_by_day = self.get_fuel_distribution_by_day() + + fuel_distribution_by_day = self.add_dates(fuel_distribution_by_day) + + fuel_distribution = self.calculate_hourly_distribution(fuel_distribution_by_day) + + self.logger.write_time_log('ResidentialSector', 'calculate_fuel_distribution_by_hour', + timeit.default_timer() - spent_time) + return fuel_distribution + + def calculate_emissions_from_fuel_distribution(self, fuel_distribution): + spent_time = timeit.default_timer() + + emissions = fuel_distribution.loc[:, ['date', 'date_utc', 'tstep', 'geometry']].copy() + for in_p in self.source_pollutants: + emissions[in_p] = 0 + for i, fuel_type_ef in self.ef_profiles.iterrows(): + emissions[in_p] += fuel_distribution.loc[:, fuel_type_ef['fuel_type']].multiply(fuel_type_ef[in_p]) + self.logger.write_time_log('ResidentialSector', 'calculate_fuel_distribution_by_hour', + timeit.default_timer() - spent_time) + + return emissions + + def calculate_output_emissions_from_fuel_distribution(self, fuel_distribution): + spent_time = timeit.default_timer() + + emissions = fuel_distribution.loc[:, ['FID', 'date', 'date_utc', 'tstep', 'geometry']].copy() + for out_p in self.output_pollutants: + emissions[out_p] = 0 + if out_p == 'PMC': + pm10_df = None + for i, fuel_type_ef in self.ef_profiles.iterrows(): + if fuel_type_ef['fuel_type'].startswith('B_'): + speciation_factor = self.speciation_profile.loc['biomass', out_p] + else: + speciation_factor = self.speciation_profile.loc['others', out_p] + + if pm10_df is None: + pm10_df = fuel_distribution.loc[:, fuel_type_ef['fuel_type']].multiply( + fuel_type_ef['pm10'] * speciation_factor) + else: + pm10_df += fuel_distribution.loc[:, fuel_type_ef['fuel_type']].multiply( + fuel_type_ef['pm10'] * speciation_factor) + pm10_df.divide(self.molecular_weights['pm10']) + + pm25_df = None + for i, fuel_type_ef in self.ef_profiles.iterrows(): + if fuel_type_ef['fuel_type'].startswith('B_'): + speciation_factor = self.speciation_profile.loc['biomass', out_p] + else: + speciation_factor = self.speciation_profile.loc['others', out_p] + + if pm25_df is None: + pm25_df = fuel_distribution.loc[:, fuel_type_ef['fuel_type']].multiply( + fuel_type_ef['pm25'] * speciation_factor) + else: + pm25_df += fuel_distribution.loc[:, fuel_type_ef['fuel_type']].multiply( + fuel_type_ef['pm25'] * speciation_factor) + pm25_df.divide(self.molecular_weights['pm25']) + + emissions[out_p] = pm10_df - pm25_df + else: + in_p = self.speciation_map[out_p] + in_df = None + for i, fuel_type_ef in self.ef_profiles.iterrows(): + if fuel_type_ef['fuel_type'].startswith('B_'): + speciation_factor = self.speciation_profile.loc['biomass', out_p] + else: + speciation_factor = self.speciation_profile.loc['others', out_p] + + if in_df is None: + in_df = fuel_distribution.loc[:, fuel_type_ef['fuel_type']].multiply( + fuel_type_ef[in_p] * speciation_factor) + else: + in_df += fuel_distribution.loc[:, fuel_type_ef['fuel_type']].multiply( + fuel_type_ef[in_p] * speciation_factor) + emissions[out_p] = in_df.divide(self.molecular_weights[in_p]) + + self.logger.write_time_log('ResidentialSector', 'calculate_output_emissions_from_fuel_distribution', + timeit.default_timer() - spent_time) + return emissions + + def calculate_emissions(self): + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + fuel_distribution_by_hour = self.calculate_fuel_distribution_by_hour() + emissions = self.calculate_output_emissions_from_fuel_distribution(fuel_distribution_by_hour) + emissions.drop(columns=['date', 'date_utc', 'geometry'], inplace=True) + emissions['layer'] = 0 + emissions.set_index(['FID', 'layer', 'tstep'], inplace=True) + + self.logger.write_log('\t\tResidential emissions calculated', message_level=2) + self.logger.write_time_log('ResidentialSector', 'calculate_emissions', timeit.default_timer() - spent_time) + return emissions diff --git a/hermesv3_bu/sectors/sector.py b/hermesv3_bu/sectors/sector.py new file mode 100755 index 0000000000000000000000000000000000000000..b2dc6ab862f3f20bf8fc5899c63ed51221a5772b --- /dev/null +++ b/hermesv3_bu/sectors/sector.py @@ -0,0 +1,561 @@ +#!/usr/bin/env python + +import sys +import os +import timeit +from hermesv3_bu.logger.log import Log +import numpy as np +import pandas as pd +import geopandas as gpd +from mpi4py import MPI + + +class Sector(object): + + def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path): + """ + Initialize the main sector class with the common arguments and methods. + + :param comm: Communicator for the sector calculation. + :type comm: MPI.Comm + + :param logger: Logger + :type logger: Log + + :param auxiliary_dir: Path to the directory where the necessary auxiliary files will be created if them are not + created yet. + :type auxiliary_dir: str + + :param grid_shp: Shapefile with the grid horizontal distribution. + :type grid_shp: GeoDataFrame + + :param date_array: List of datetimes. + :type date_array: list(datetime.datetime, ...) + + :param source_pollutants: List of input pollutants to take into account. + :type source_pollutants: list + + :param vertical_levels: List of top level of each vertical layer. + :type vertical_levels: list + + :param monthly_profiles_path: Path to the CSV file that contains all the monthly profiles. The CSV file must + contain the following columns [P_month, January, February, March, April, May, June, July, August, September, + October, November, December] + :type monthly_profiles_path: str + + :param weekly_profiles_path: Path to the CSV file that contains all the weekly profiles. The CSV file must + contain the following columns [P_week, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday] + :type weekly_profiles_path: str + + :param hourly_profiles_path: Path to the CSV file that contains all the hourly profiles. The CSV file must + contain the following columns [P_hour, 0, 1, 2, 3, ..., 22, 23] + :type hourly_profiles_path: str + + :param speciation_map_path: Path to the CSV file that contains the speciation map. The CSV file must contain + the following columns [dst, src, description] + The 'dst' column will be used as output pollutant list and the 'src' column as their onw input pollutant + to be used as a fraction in the speciation profiles. + :type speciation_map_path: str + + :param speciation_profiles_path: Path to the file that contains all the speciation profiles. The CSV file + must contain the "Code" column with the value of each animal of the animal_list. The rest of columns + have to be the sames as the column 'dst' of the 'speciation_map_path' file. + :type speciation_profiles_path: str + + :param molecular_weights_path: Path to the CSV file that contains all the molecular weights needed. The CSV + file must contain the 'Specie' and 'MW' columns. + :type molecular_weights_path: str + + """ + spent_time = timeit.default_timer() + self.comm = comm + self.logger = logger + self.auxiliary_dir = auxiliary_dir + self.grid_shp = grid_shp + self.clip = clip + self.date_array = date_array + self.source_pollutants = source_pollutants + + self.vertical_levels = vertical_levels + + # Reading temporal profiles + self.monthly_profiles = self.read_monthly_profiles(monthly_profiles_path) + self.weekly_profiles = self.read_weekly_profiles(weekly_profiles_path) + self.hourly_profiles = self.read_hourly_profiles(hourly_profiles_path) + + # Reading speciation files + self.speciation_map = self.read_speciation_map(speciation_map_path) + self.speciation_profile = self.read_speciation_profiles(speciation_profiles_path) + self.molecular_weights = self.read_molecular_weights(molecular_weights_path) + + self.output_pollutants = self.speciation_map.keys() + + self.logger.write_time_log('Sector', '__init__', timeit.default_timer() - spent_time) + + def read_speciation_profiles(self, path): + """ + Read all the speciation profiles. + + The CSV must contain the column ID with the identification of that profile. The rest of columns are the output + pollutant and the value is the fraction of input pollutant that goes to that output pollutant. + + e.g.: + ID,NOx,SOx,CO,NMVOC,PM10,PM25,PMC,CO2 + default,1,1,1,1,1,1,1,1 + + :param path: Path to the CSV that contains the speciation profiles. + :type path: str + + :return: Dataframe with the speciation profile and the ID as index. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + if path is None: + dataframe = None + else: + dataframe = pd.read_csv(path) + dataframe.set_index('ID', inplace=True) + + self.logger.write_time_log('Sector', 'read_speciation_profiles', timeit.default_timer() - spent_time) + return dataframe + + def read_speciation_map(self, path): + """ + Read the speciation map. + + The speciation map is the CSV file that contains the relation from the output pollutant and the correspondent + input pollutant associated. That file also contains a short description of the output pollutant and the units to + be stored. + + e.g.: + dst,src,description,units + NOx,nox_no2,desc_no,mol.s-1 + SOx,so2,desc_so2,mol.s-1 + CO,co,desc_co,mol.s-1 + CO2,co2,desc_co2,mol.s-1 + NMVOC,nmvoc,desc_nmvoc,g.s-1 + PM10,pm10,desc_pm10,g.s-1 + PM25,pm25,desc_pm25,g.s-1 + PMC,,desc_pmc,g.s-1 + + :param path: Path to the speciation map file. + :type path: str + + :return: Dictionary with the output pollutant as key and the input pollutant as value. + :rtype: dict + """ + spent_time = timeit.default_timer() + dataframe = pd.read_csv(path) + if 'PMC' in dataframe['dst'].values and all(element in self.source_pollutants for element in ['pm10', 'pm25']): + dataframe_aux = dataframe.loc[dataframe['src'].isin(self.source_pollutants), :] + dataframe = pd.concat([dataframe_aux, dataframe.loc[dataframe['dst'] == 'PMC', :]]) + else: + dataframe = dataframe.loc[dataframe['src'].isin(self.source_pollutants), :] + + dataframe = dict(zip(dataframe['dst'], dataframe['src'])) + self.logger.write_time_log('Sector', 'read_speciation_map', timeit.default_timer() - spent_time) + + return dataframe + + def read_molecular_weights(self, path): + """ + Read the CSV file that contains the molecular weights + + e.g.: + Specie,MW + nox_no,30.01 + nox_no2,46.01 + co,28.01 + co2,44.01 + so2,64.06 + nh3,17.03 + + :param path: Path to the CSV file. + :type path: str + + :return: Dictionary with the specie as key and the molecular weight as value. + :rtype: dict + """ + spent_time = timeit.default_timer() + dataframe = pd.read_csv(path) + # dataframe = dataframe.loc[dataframe['Specie'].isin(self.source_pollutants)] + + mol_wei = dict(zip(dataframe['Specie'], dataframe['MW'])) + self.logger.write_time_log('Sector', 'read_molecular_weights', timeit.default_timer() - spent_time) + + return mol_wei + + def read_profiles(self, path, sep=','): + """ + Read the CSV profile. + + :param path: Path to the CSV file that contains the profiles + :type path: str + + :param sep: Separator of the values. [default -> ','] + :type sep: str + + :return: DataFrame with the profiles. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + dataframe = pd.read_csv(path, sep=sep) + self.logger.write_time_log('Sector', 'read_profiles', timeit.default_timer() - spent_time) + + return dataframe + + def read_monthly_profiles(self, path): + """ + Read the DataFrame of the monthly profiles with the month number as columns. + + :param path: Path to the file that contains the monthly profiles. + :type path: str + + :return: DataFrame of the monthly profiles. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + if path is None: + profiles = None + else: + profiles = pd.read_csv(path) + + profiles.rename( + columns={'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, 'July': 7, + 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12}, + inplace=True) + profiles.set_index('P_month', inplace=True) + + self.logger.write_time_log('Sector', 'read_monthly_profiles', timeit.default_timer() - spent_time) + return profiles + + def read_weekly_profiles(self, path): + """ + Read the Dataset of the weekly profiles with the weekdays as numbers (Monday: 0 - Sunday:6) as columns. + + + :param path: Path to the file that contains the weekly profiles. + :type path: str + + :return: Dataset od the weekly profiles. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + if path is None: + profiles = None + else: + profiles = pd.read_csv(path) + + profiles.rename( + columns={'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3, 'Friday': 4, 'Saturday': 5, + 'Sunday': 6, }, inplace=True) + profiles.set_index('P_week', inplace=True) + self.logger.write_time_log('Sector', 'read_weekly_profiles', timeit.default_timer() - spent_time) + return profiles + + def read_hourly_profiles(self, path): + """ + Read the Dataset of the hourly profiles with the hours (int) as columns. + + :param path: Path to the file that contains the monthly profiles. + :type path: str + + :return: Dataset od the monthly profiles. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + if path is None: + profiles = None + else: + profiles = pd.read_csv(path) + profiles.rename( + columns={'P_hour': -1, '00': 0, '01': 1, '02': 2, '03': 3, '04': 4, '05': 5, '06': 6, '07': 7, + '08': 8, '09': 9, '10': 10, '11': 11, '12': 12, '13': 13, '14': 14, '15': 15, '16': 16, + '17': 17, '18': 18, '19': 19, '20': 20, '21': 21, '22': 22, '23': 23}, inplace=True) + profiles.columns = profiles.columns.astype(int) + profiles.rename(columns={-1: 'P_hour'}, inplace=True) + profiles.set_index('P_hour', inplace=True) + self.logger.write_time_log('Sector', 'read_hourly_profiles', timeit.default_timer() - spent_time) + return profiles + + def calculate_rebalanced_weekly_profile(self, profile, date): + """ + Correct the weekly profile depending on the date selected. + + If we sum the weekday factor of each day of the full month it mus sum 1. + + :param profile: Profile to be corrected. + {0: 1.0414, 1: 1.0310, 2: 1.0237, 3: 1.0268, 4: 1.0477, 5: 0.9235, 6: 0.9058} + :type profile: dict + + :param date: Date to select the month to evaluate. + :type date: datetime.datetime + + :return: Profile already rebalanced. + :rtype: dict + """ + spent_time = timeit.default_timer() + weekdays = self.calculate_weekdays(date) + + rebalanced_profile = self.calculate_weekday_factor_full_month(profile, weekdays) + self.logger.write_time_log('Sector', 'calculate_rebalanced_weekly_profile', timeit.default_timer() - spent_time) + + return rebalanced_profile + + def calculate_weekday_factor_full_month(self, profile, weekdays): + """ + Operate with all the days of the month to get the sum of daily factors of the full month. + + :param profile: input profile + :type profile: dict + + :param weekdays: Dictionary with the number of days of each day type (Monday, Tuesday, ...) + :type weekdays: dict + + :return: Dictionary with the corrected profile. + :rtype: dict + """ + spent_time = timeit.default_timer() + weekdays_factors = 0 + num_days = 0 + for day in xrange(7): + weekdays_factors += profile[day] * weekdays[day] + num_days += weekdays[day] + increment = float(num_days - weekdays_factors) / num_days + for day in xrange(7): + profile[day] = (increment + profile[day]) / num_days + self.logger.write_time_log('Sector', 'calculate_weekday_factor_full_month', timeit.default_timer() - spent_time) + + return profile + + def calculate_weekdays(self, date): + """ + Calculate the number of days of each day type for the given month of the year. + + :param date: Date to select the month to evaluate. + :type date: datetime.datetime + + :return: Dictionary with the number of days of each day type (Monday, Tuesday, ...) + :rtype: dict + """ + from calendar import monthrange, weekday, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY + spent_time = timeit.default_timer() + weekdays = [MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY] + days = [weekday(date.year, date.month, d + 1) for d in xrange(monthrange(date.year, date.month)[1])] + + weekdays_dict = {} + for i, day in enumerate(weekdays): + weekdays_dict[i] = days.count(day) + self.logger.write_time_log('Sector', 'calculate_weekdays', timeit.default_timer() - spent_time) + return weekdays_dict + + def add_dates(self, dataframe, drop_utc=True): + """ + Add the 'date' and 'tstep' column to the dataframe. + + The dataframe will be replicated as many times as time steps to calculate. + + :param dataframe: Geodataframe to be extended with the dates. + :type dataframe: GeoDataFrame + + :return: Geodataframe with the dates. The length of the new dataframe is the length of the input dataframe + multiplied by the number of time steps. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + dataframe = self.add_timezone(dataframe) + df_list = [] + + for tstep, date in enumerate(self.date_array): + df_aux = dataframe.copy() + df_aux['date'] = pd.to_datetime(date, utc=True) + df_aux['date_utc'] = pd.to_datetime(date, utc=True) + df_aux['tstep'] = tstep + # df_aux = self.to_timezone(df_aux) + df_list.append(df_aux) + dataframe = pd.concat(df_list, ignore_index=True) + dataframe = self.to_timezone(dataframe) + if drop_utc: + dataframe.drop('date_utc', axis=1, inplace=True) + self.logger.write_time_log('Sector', 'add_dates', timeit.default_timer() - spent_time) + + return dataframe + + def add_timezone(self, dataframe): + """ + Add the timezone os the centroid of each geometry of the input geodataframe. + + :param dataframe: Geodataframe where add the timezone. + :type dataframe: geopandas.GeoDataframe + + :return: Geodataframe with the timezone column. + :rtype: geopandas.GeoDataframe """ + from timezonefinder import TimezoneFinder + spent_time = timeit.default_timer() + dataframe = dataframe.to_crs({'init': 'epsg:4326'}) + tzfinder = TimezoneFinder() + dataframe['timezone'] = dataframe.centroid.apply(lambda x: tzfinder.timezone_at(lng=x.x, lat=x.y)) + dataframe.reset_index(inplace=True) + self.logger.write_time_log('Sector', 'add_timezone', timeit.default_timer() - spent_time) + return dataframe + + def to_timezone(self, dataframe): + """ + Set the local date with the correspondent timezone substituting the UTC date. + + :param dataframe: DataFrame with the UTC date column. + :type dataframe: DataFrame + + :return: Catalog with the local date column. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + dataframe['date'] = dataframe.groupby('timezone')['date'].apply( + lambda x: x.dt.tz_convert(x.name).dt.tz_localize(None)) + + dataframe.drop('timezone', axis=1, inplace=True) + self.logger.write_time_log('Sector', 'to_timezone', timeit.default_timer() - spent_time) + + return dataframe + + def add_nut_code(self, shapefile, nut_shapefile_path, nut_value='ORDER06'): + """ + Add 'nut_code' column into the shapefile based on the 'nut_value' column of the 'nut_shapefile_path' shapefile. + + The elements that are not into any NUT will be dropped. + If an element belongs to two NUTs will be set the fist one that appear in the 'nut_shapefile_path' shapefile. + + :param shapefile: Shapefile where add the NUT code. + :type shapefile: geopandas.GeoDataframe + + :param nut_shapefile_path: Path to the shapefile with the polygons that contains the NUT code into the + 'nut_value' column. + :type nut_shapefile_path: str + + :param nut_value: Column name of the NUT codes. + :type nut_value: str + + :return: Shapefile with the 'nut_code' column set. + :rtype: geopandas.GeoDataframe + """ + spent_time = timeit.default_timer() + nut_shapefile = gpd.read_file(nut_shapefile_path).to_crs(shapefile.crs) + shapefile = gpd.sjoin(shapefile, nut_shapefile.loc[:, [nut_value, 'geometry']], how='left', op='intersects') + + shapefile = shapefile[~shapefile.index.duplicated(keep='first')] + shapefile.drop('index_right', axis=1, inplace=True) + + shapefile.rename(columns={nut_value: 'nut_code'}, inplace=True) + shapefile.loc[shapefile['nut_code'].isna(), 'nut_code'] = -999 + shapefile['nut_code'] = shapefile['nut_code'].astype(np.int16) + self.logger.write_time_log('Sector', 'add_nut_code', timeit.default_timer() - spent_time) + + return shapefile + + def spatial_overlays(self, df1, df2, how='intersection'): + """ + Compute overlay intersection of two GeoPandasDataFrames df1 and df2 + + https://github.com/geopandas/geopandas/issues/400 + + :param df1: GeoDataFrame + :param df2: GeoDataFrame + :param how: Operation to do + :return: GeoDataFrame + """ + spent_time = timeit.default_timer() + df1 = df1.copy() + df2 = df2.copy() + df1['geometry'] = df1.geometry.buffer(0) + df2['geometry'] = df2.geometry.buffer(0) + if how == 'intersection': + # Spatial Index to create intersections + spatial_index = df2.sindex + df1['bbox'] = df1.geometry.apply(lambda x: x.bounds) + df1['histreg'] = df1.bbox.apply(lambda x: list(spatial_index.intersection(x))) + pairs = df1['histreg'].to_dict() + nei = [] + for i, j in pairs.items(): + for k in j: + nei.append([i, k]) + + pairs = gpd.GeoDataFrame(nei, columns=['idx1', 'idx2'], crs=df1.crs) + pairs = pairs.merge(df1, left_on='idx1', right_index=True) + pairs = pairs.merge(df2, left_on='idx2', right_index=True, suffixes=['_1', '_2']) + pairs['Intersection'] = pairs.apply(lambda x: (x['geometry_1'].intersection(x['geometry_2'])).buffer(0), + axis=1) + pairs = gpd.GeoDataFrame(pairs, columns=pairs.columns, crs=df1.crs) + cols = pairs.columns.tolist() + cols.remove('geometry_1') + cols.remove('geometry_2') + cols.remove('histreg') + cols.remove('bbox') + cols.remove('Intersection') + dfinter = pairs[cols + ['Intersection']].copy() + dfinter.rename(columns={'Intersection': 'geometry'}, inplace=True) + dfinter = gpd.GeoDataFrame(dfinter, columns=dfinter.columns, crs=pairs.crs) + dfinter = dfinter.loc[~dfinter.geometry.is_empty] + return_value = dfinter + elif how == 'difference': + spatial_index = df2.sindex + df1['bbox'] = df1.geometry.apply(lambda x: x.bounds) + df1['histreg'] = df1.bbox.apply(lambda x: list(spatial_index.intersection(x))) + df1['new_g'] = df1.apply(lambda x: reduce(lambda x, y: x.difference(y).buffer(0), + [x.geometry] + list(df2.iloc[x.histreg].geometry)), axis=1) + df1.geometry = df1.new_g + df1 = df1.loc[~df1.geometry.is_empty].copy() + df1.drop(['bbox', 'histreg', 'new_g'], axis=1, inplace=True) + return_value = df1 + self.logger.write_time_log('Sector', 'spatial_overlays', timeit.default_timer() - spent_time) + + return return_value + + def nearest(self, row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None): + """Finds the nearest point and return the corresponding value from specified column. + https://automating-gis-processes.github.io/2017/lessons/L3/nearest-neighbour.html#nearest-points-using-geopandas + """ + from shapely.ops import nearest_points + spent_time = timeit.default_timer() + # Find the geometry that is closest + nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1] + # Get the corresponding value from df2 (matching is based on the geometry) + value = df2[nearest][src_column].get_values()[0] + self.logger.write_time_log('Sector', 'nearest', timeit.default_timer() - spent_time) + + return value + + def speciate(self, dataframe, code='default'): + spent_time = timeit.default_timer() + self.logger.write_log('\t\tSpeciating {0} emissions'.format(code), message_level=2) + + new_dataframe = pd.DataFrame(index=dataframe.index, data=None) + for out_pollutant in self.output_pollutants: + if out_pollutant != 'PMC': + self.logger.write_log("\t\t\t{0} = ({1}/{2})*{3}".format( + out_pollutant, self.speciation_map[out_pollutant], + self.molecular_weights[self.speciation_map[out_pollutant]], + self.speciation_profile.loc[code, out_pollutant]), message_level=3) + if self.speciation_map[out_pollutant] in dataframe.columns.values: + new_dataframe[out_pollutant] = (dataframe[self.speciation_map[out_pollutant]] / + self.molecular_weights[self.speciation_map[out_pollutant]]) * \ + self.speciation_profile.loc[code, out_pollutant] + else: + self.logger.write_log("\t\t\t{0} = ({1}/{2} - {4}/{5})*{3}".format( + out_pollutant, 'pm10', self.molecular_weights['pm10'], + self.speciation_profile.loc[code, out_pollutant], 'pm25', self.molecular_weights['pm25']), + message_level=3) + + new_dataframe[out_pollutant] = \ + ((dataframe['pm10'] / self.molecular_weights['pm10']) - + (dataframe['pm25'] / self.molecular_weights['pm25'])) * \ + self.speciation_profile.loc[code, out_pollutant] + self.logger.write_time_log('Sector', 'speciate', timeit.default_timer() - spent_time) + return new_dataframe + + def get_output_pollutants(self, input_pollutant): + spent_time = timeit.default_timer() + return_value = [outs for outs, ints in self.speciation_map.iteritems() if ints == input_pollutant] + self.logger.write_time_log('Sector', 'get_output_pollutants', timeit.default_timer() - spent_time) + return return_value diff --git a/hermesv3_bu/sectors/sector_manager.py b/hermesv3_bu/sectors/sector_manager.py new file mode 100755 index 0000000000000000000000000000000000000000..f36f32c1ed19a93eae922540e67489579c60166d --- /dev/null +++ b/hermesv3_bu/sectors/sector_manager.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python + +import timeit +from hermesv3_bu.logger.log import Log + +SECTOR_LIST = ['traffic', 'traffic_area', 'aviation', 'point_sources', 'recreational_boats', 'shipping_port', + 'residential', 'livestock', 'crop_operations', 'crop_fertilizers', 'agricultural_machinery'] + + +class SectorManager(object): + def __init__(self, comm_world, logger, grid, clip, date_array, arguments): + """ + + :param comm_world: MPI Communicator + + :param logger: Logger + :type logger: Log + + :param grid: + :param clip: + :param date_array: + :type date_array: list + + :param arguments: + :type arguments: NameSpace + """ + spent_time = timeit.default_timer() + self.logger = logger + self.sector_list = self.make_sector_list(arguments, comm_world.Get_size()) + self.logger.write_log('Sector process distribution:') + for sect, procs in self.sector_list.iteritems(): + self.logger.write_log('\t{0}: {1}'.format(sect, procs)) + + color = 10 + agg_color = 99 + for sector, sector_procs in self.sector_list.iteritems(): + if sector == 'aviation' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.aviation_sector import AviationSector + self.sector = AviationSector( + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, + arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + arguments.aviation_source_pollutants, grid.vertical_desctiption, arguments.airport_list, + arguments.plane_list, arguments.airport_shapefile_path, arguments.airport_runways_shapefile_path, + arguments.airport_runways_corners_shapefile_path, arguments.airport_trajectories_shapefile_path, + arguments.airport_operations_path, arguments.planes_path, arguments.airport_times_path, + arguments.airport_ef_dir, arguments.aviation_weekly_profiles, arguments.aviation_hourly_profiles, + arguments.speciation_map, arguments.aviation_speciation_profiles, arguments.molecular_weights) + + elif sector == 'shipping_port' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.shipping_port_sector import ShippingPortSector + self.sector = ShippingPortSector( + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, + arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + arguments.shipping_port_source_pollutants, grid.vertical_desctiption, arguments.vessel_list, + arguments.port_list, arguments.hoteling_shapefile_path, arguments.maneuvering_shapefile_path, + arguments.shipping_port_ef_path, arguments.shipping_port_engine_percent_path, + arguments.shipping_port_tonnage_path, arguments.shipping_port_load_factor_path, + arguments.shipping_port_power_path, arguments.shipping_port_monthly_profiles, + arguments.shipping_port_weekly_profiles, arguments.shipping_port_hourly_profiles, + arguments.speciation_map, arguments.shipping_port_speciation_profiles, arguments.molecular_weights) + + elif sector == 'livestock' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.livestock_sector import LivestockSector + self.sector = LivestockSector( + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, + arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + arguments.livestock_source_pollutants, grid.vertical_desctiption, arguments.animal_list, + arguments.gridded_livestock, arguments.correction_split_factors, + arguments.temperature_daily_files_path, arguments.wind_speed_daily_files_path, + arguments.denominator_yearly_factor_dir, arguments.livestock_ef_files_dir, + arguments.livestock_monthly_profiles, arguments.livestock_weekly_profiles, + arguments.livestock_hourly_profiles, arguments.speciation_map, + arguments.livestock_speciation_profiles, arguments.molecular_weights, arguments.nut_shapefile_prov) + + elif sector == 'crop_operations' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.agricultural_crop_operations_sector import AgriculturalCropOperationsSector + agg_procs = AgriculturalCropOperationsSector.get_agricultural_processor_list(self.sector_list) + comm_agr = comm_world.Split(agg_color, agg_procs.index(comm_world.Get_rank())) + comm = comm_agr.Split(color, sector_procs.index(comm_world.Get_rank())) + self.sector = AgriculturalCropOperationsSector( + comm_agr, comm, logger, arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + arguments.crop_operations_source_pollutants, + grid.vertical_desctiption, arguments.crop_operations_list, arguments.nut_shapefile_ccaa, + arguments.land_uses_path, arguments.crop_operations_ef_files_dir, + arguments.crop_operations_monthly_profiles, arguments.crop_operations_weekly_profiles, + arguments.crop_operations_hourly_profiles, arguments.speciation_map, + arguments.crop_operations_speciation_profiles, arguments.molecular_weights, + arguments.land_use_by_nut_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path) + + elif sector == 'crop_fertilizers' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.agricultural_crop_fertilizers_sector import AgriculturalCropFertilizersSector + agg_procs = AgriculturalCropFertilizersSector.get_agricultural_processor_list(self.sector_list) + comm_agr = comm_world.Split(agg_color, agg_procs.index(comm_world.Get_rank())) + comm = comm_agr.Split(color, sector_procs.index(comm_world.Get_rank())) + self.sector = AgriculturalCropFertilizersSector( + comm_agr, comm, logger, arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + arguments.crop_fertilizers_source_pollutants, grid.vertical_desctiption, + arguments.crop_fertilizers_list, arguments.nut_shapefile_ccaa, arguments.land_uses_path, + arguments.crop_fertilizers_hourly_profiles, arguments.speciation_map, + arguments.crop_fertilizers_speciation_profiles, arguments.molecular_weights, + arguments.land_use_by_nut_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path, + arguments.cultivated_ratio, arguments.fertilizers_rate, arguments.crop_f_parameter, + arguments.crop_f_fertilizers, arguments.gridded_ph, arguments.gridded_cec, + arguments.fertilizers_denominator_yearly_factor_path, arguments.crop_calendar, + arguments.temperature_daily_files_path, arguments.wind_speed_daily_files_path, + arguments.crop_growing_degree_day_path) + + elif sector == 'agricultural_machinery' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.agricultural_machinery_sector import AgriculturalMachinerySector + agg_procs = AgriculturalMachinerySector.get_agricultural_processor_list(self.sector_list) + comm_agr = comm_world.Split(agg_color, agg_procs.index(comm_world.Get_rank())) + comm = comm_agr.Split(color, sector_procs.index(comm_world.Get_rank())) + self.sector = AgriculturalMachinerySector( + comm_agr, comm, logger, arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + arguments.crop_machinery_source_pollutants, grid.vertical_desctiption, + arguments.crop_machinery_list, arguments.nut_shapefile_ccaa, arguments.machinery_list, + arguments.land_uses_path, arguments.crop_machinery_ef_path, + arguments.crop_machinery_monthly_profiles, arguments.crop_machinery_weekly_profiles, + arguments.crop_machinery_hourly_profiles, + arguments.speciation_map, arguments.crop_machinery_speciation_profiles, arguments.molecular_weights, + arguments.land_use_by_nut_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path, + arguments.nut_shapefile_prov, arguments.crop_machinery_deterioration_factor_path, + arguments.crop_machinery_load_factor_path, arguments.crop_machinery_vehicle_ratio_path, + arguments.crop_machinery_vehicle_units_path, arguments.crop_machinery_vehicle_workhours_path, + arguments.crop_machinery_vehicle_power_path, arguments.crop_machinery_by_nut) + + elif sector == 'residential' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.residential_sector import ResidentialSector + self.sector = ResidentialSector( + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, + arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + arguments.residential_source_pollutants, grid.vertical_desctiption, arguments.fuel_list, + arguments.nut_shapefile_prov, arguments.nut_shapefile_ccaa, arguments.population_density_map, + arguments.population_type_map, arguments.population_type_by_ccaa, arguments.population_type_by_prov, + arguments.energy_consumption_by_prov, arguments.energy_consumption_by_ccaa, + arguments.residential_spatial_proxies, arguments.residential_ef_files_path, + arguments.residential_heating_degree_day_path, arguments.temperature_daily_files_path, + arguments.residential_hourly_profiles, arguments.speciation_map, + arguments.residential_speciation_profiles, arguments.molecular_weights) + + elif sector == 'recreational_boats' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.recreational_boats_sector import RecreationalBoatsSector + self.sector = RecreationalBoatsSector( + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, + arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + arguments.recreational_boats_source_pollutants, grid.vertical_desctiption, + arguments.recreational_boats_list, arguments.recreational_boats_density_map, + arguments.recreational_boats_by_type, arguments.recreational_boats_ef_path, + arguments.recreational_boats_monthly_profiles, arguments.recreational_boats_weekly_profiles, + arguments.recreational_boats_hourly_profiles, arguments.speciation_map, + arguments.recreational_boats_speciation_profiles, arguments.molecular_weights) + + elif sector == 'point_sources' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.point_source_sector import PointSourceSector + self.sector = PointSourceSector( + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, + arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + arguments.point_source_pollutants, grid.vertical_desctiption, + arguments.point_source_catalog, arguments.point_source_monthly_profiles, + arguments.point_source_weekly_profiles, arguments.point_source_hourly_profiles, + arguments.speciation_map, arguments.point_source_speciation_profiles, arguments.point_source_snaps, + arguments.point_source_measured_emissions, arguments.molecular_weights, + plume_rise=arguments.plume_rise, plume_rise_pahts={ + 'friction_velocity_dir': arguments.friction_velocity_dir, + 'pblh_dir': arguments.pblh_dir, + 'obukhov_length_dir': arguments.obukhov_length_dir, + 'layer_thickness_dir': arguments.layer_thickness_dir, + 'temperature_sfc_dir': arguments.temperature_sfc_dir, + 'temperature_4d_dir': arguments.temperature_4d_dir, + 'u10_wind_speed_dir': arguments.u10_wind_speed_dir, + 'v10_wind_speed_dir': arguments.v10_wind_speed_dir, + 'u_wind_speed_4d_dir': arguments.u_wind_speed_4d_dir, + 'v_wind_speed_4d_dir': arguments.v_wind_speed_4d_dir}) + elif sector == 'traffic' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.traffic_sector import TrafficSector + self.sector = TrafficSector( + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, + arguments.auxiliary_files_path, grid.shapefile, clip, date_array, arguments.traffic_pollutants, + grid.vertical_desctiption, arguments.road_link_path, arguments.fleet_compo_path, + arguments.traffic_speed_hourly_path, arguments.traffic_monthly_profiles, + arguments.traffic_weekly_profiles, arguments.traffic_hourly_profiles_mean, + arguments.traffic_hourly_profiles_weekday, arguments.traffic_hourly_profiles_saturday, + arguments.traffic_hourly_profiles_sunday, arguments.traffic_ef_path, arguments.vehicle_types, + arguments.load, arguments.speciation_map, arguments.traffic_speciation_profile_hot_cold, + arguments.traffic_speciation_profile_tyre, arguments.traffic_speciation_profile_road, + arguments.traffic_speciation_profile_brake, arguments.traffic_speciation_profile_resuspension, + arguments.temperature_hourly_files_path, arguments.output_dir, arguments.molecular_weights, + arguments.resuspension_correction, arguments.precipitation_files_path, arguments.do_hot, + arguments.do_cold, arguments.do_tyre_wear, arguments.do_brake_wear, arguments.do_road_wear, + arguments.do_resuspension, arguments.write_rline) + + elif sector == 'traffic_area' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.traffic_area_sector import TrafficAreaSector + self.sector = TrafficAreaSector( + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, + arguments.auxiliary_files_path, grid.shapefile, clip, date_array, arguments.traffic_area_pollutants, + grid.vertical_desctiption, arguments.population_density_map, arguments.speciation_map, + arguments.molecular_weights, arguments.do_evaporative, arguments.traffic_area_gas_path, + arguments.popullation_by_municipality, arguments.nut_shapefile_prov, + arguments.traffic_area_speciation_profiles_evaporative, arguments.traffic_area_evaporative_ef_file, + arguments.temperature_hourly_files_path, arguments.do_small_cities, + arguments.traffic_area_small_cities_path, arguments.traffic_area_speciation_profiles_small_cities, + arguments.traffic_area_small_cities_ef_file, arguments.small_cities_monthly_profile, + arguments.small_cities_weekly_profile, arguments.small_cities_hourly_profile + ) + + color += 1 + + self.logger.write_time_log('SectorManager', '__init__', timeit.default_timer() - spent_time) + + def run(self): + spent_time = timeit.default_timer() + emis = self.sector.calculate_emissions() + self.logger.write_time_log('SectorManager', 'run', timeit.default_timer() - spent_time) + return emis + + def make_sector_list(self, arguments, max_procs): + spent_time = timeit.default_timer() + sector_dict = {} + accum = 0 + for sector in SECTOR_LIST: + if vars(arguments)['do_{0}'.format(sector)]: + n_procs = vars(arguments)['{0}_processors'.format(sector)] + sector_dict[sector] = [accum + x for x in xrange(n_procs)] + accum += n_procs + if accum != max_procs: + raise ValueError("The selected number of processors '{0}' does not fit ".format(max_procs) + + "with the sum of processors dedicated for all the sectors " + + "'{0}': {1}".format(accum, {sector: len(sector_procs) + for sector, sector_procs in sector_dict.iteritems()})) + + self.logger.write_time_log('SectorManager', 'make_sector_list', timeit.default_timer() - spent_time) + return sector_dict diff --git a/hermesv3_bu/sectors/shipping_port_sector.py b/hermesv3_bu/sectors/shipping_port_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..bb6823bd1373cee9970276815873482073f4e4b3 --- /dev/null +++ b/hermesv3_bu/sectors/shipping_port_sector.py @@ -0,0 +1,649 @@ +#!/usr/bin/env python + +from hermesv3_bu.sectors.sector import Sector +import pandas as pd +import geopandas as gpd +import numpy as np +import timeit +from hermesv3_bu.logger.log import Log +from hermesv3_bu.io_server.io_shapefile import IoShapefile + + +class ShippingPortSector(Sector): + def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + vessel_list, port_list, hoteling_shapefile_path, maneuvering_shapefile_path, ef_dir, + engine_percent_path, tonnage_path, load_factor_path, power_path, monthly_profiles_path, + weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, + molecular_weights_path): + """ + Initialise the Shipping port sectopr class + + :param comm: Communicator for the sector calculation. + :type comm: MPI.COMM + + :param logger: Logger + :type logger: Log + + :param auxiliary_dir: Path to the directory where the necessary auxiliary files will be created if them are not + created yet. + :type auxiliary_dir: str + + :param grid_shp: Shapefile with the grid horizontal distribution. + :type grid_shp: GeoDataFrame + + :param date_array: List of datetimes. + :type date_array: list(datetime.datetime, ...) + + :param source_pollutants: List of input pollutants to take into account. + :type source_pollutants: list + + :param vertical_levels: List of top level of each vertical layer. + :type vertical_levels: list + + :param vessel_list: List of vessels to take into account. + :type vessel_list: list + + :param port_list: List of ports to take into account. + :type port_list: list + + :param ef_dir: Path to the CSV that contains all the emission factors. + Units: g/kWh + :type ef_dir: str + + :param monthly_profiles_path: Path to the CSV file that contains all the monthly profiles. The CSV file must + contain the following columns [P_month, January, February, March, April, May, June, July, August, September, + October, November, December] + :type monthly_profiles_path: str + + :param weekly_profiles_path: Path to the CSV file that contains all the weekly profiles. The CSV file must + contain the following columns [P_week, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday] + The P_week code have to be the input pollutant. + :type weekly_profiles_path: str + + :param hourly_profiles_path: Path to the CSV file that contains all the hourly profiles. The CSV file must + contain the following columns [P_hour, 0, 1, 2, 3, ..., 22, 23] + :type hourly_profiles_path: str + + :param speciation_map_path: Path to the CSV file that contains the speciation map. The CSV file must contain + the following columns [dst, src, description] + The 'dst' column will be used as output pollutant list and the 'src' column as their onw input pollutant + to be used as a fraction in the speciation profiles. + :type speciation_map_path: str + + :param speciation_profiles_path: Path to the file that contains all the speciation profiles. The CSV file + must contain the "Code" column with the value of each animal of the animal_list. The rest of columns + have to be the sames as the column 'dst' of the 'speciation_map_path' file. + :type speciation_profiles_path: str + + :param molecular_weights_path: Path to the CSV file that contains all the molecular weights needed. The CSV + file must contain the 'Specie' and 'MW' columns. + :type molecular_weights_path: str + """ + spent_time = timeit.default_timer() + logger.write_log('===== SHIPPING PORT SECTOR =====') + + super(ShippingPortSector, self).__init__( + comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path) + + self.ef_engine = self.read_profiles(ef_dir) + + self.vessel_list = vessel_list + + self.hoteling_shapefile_path = hoteling_shapefile_path + self.maneuvering_shapefile_path = maneuvering_shapefile_path + + self.port_list = self.get_port_list() + + self.engine_percent = self.read_profiles(engine_percent_path) + self.tonnage = self.read_profiles(tonnage_path) + self.tonnage.set_index('code', inplace=True) + self.load_factor = self.read_profiles(load_factor_path) + self.power_values = self.read_profiles(power_path) + self.logger.write_time_log('ShippingPortSector', '__init__', timeit.default_timer() - spent_time) + + def get_port_list(self): + if self.comm.Get_rank() == 0: + port_shp = IoShapefile(self.comm).read_shapefile_serial(self.maneuvering_shapefile_path) + port_shp.drop(columns=['Name', 'Weight'], inplace=True) + + port_shp = gpd.sjoin(port_shp, self.clip.shapefile.to_crs(port_shp.crs), how='inner', op='intersects') + port_list = np.unique(port_shp['code'].values) + print port_list + if len(port_list) < self.comm.Get_size(): + raise ValueError("The chosen number of processors {0} exceeds the number of involved ports {1}.".format( + self.comm.Get_size(), len(port_list)) + " Set {0} at shipping_port_processors value.".format( + len(port_list))) + port_list = np.array_split(port_list, self.comm.Get_size()) + else: + port_list = None + + port_list = self.comm.scatter(port_list, root=0) + + if len(port_list) == 0: + raise ValueError("The number ") + return list(port_list) + + def read_monthly_profiles(self, path): + """ + Read the DataFrame of the monthly profiles with the month number as columns. + + Overwrites the method of the super class. + + :param path: Path to the file that contains the monthly profiles. + :type path: str + + :return: DataFrame of the monthly profiles. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + if path is None: + profiles = None + else: + profiles = pd.read_csv(path) + + profiles.rename( + columns={'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, 'July': 7, + 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12}, + inplace=True) + self.logger.write_time_log('ShippingPortSector', 'read_monthly_profiles', timeit.default_timer() - spent_time) + + return profiles + + def add_timezone(self, dataframe, shapefile_path): + """ + Add the timezone os the centroid of each geometry of the input shapefile. + + Overwrites the method of the super class. + + :param dataframe: DataFrame where add the timezone. + :type dataframe: DataFrame + + :param shapefile_path: Path to the shapefile that contains the port geometries. + :type shapefile_path: str + + :return: DataFrame with the timezone. + :rtype: DataFrame + """ + from timezonefinder import TimezoneFinder + spent_time = timeit.default_timer() + shapefile = gpd.read_file(shapefile_path) + shapefile = shapefile.loc[:, ['code', 'geometry']] + shapefile.drop_duplicates('code', keep='first', inplace=True) + + shapefile = shapefile.to_crs({'init': 'epsg:4326'}) + tzfinder = TimezoneFinder() + shapefile['timezone'] = shapefile.centroid.apply(lambda x: tzfinder.timezone_at(lng=x.x, lat=x.y)) + dataframe.reset_index(inplace=True) + dataframe = pd.merge(dataframe, shapefile.loc[:, ['code', 'timezone']], on='code') + dataframe.set_index(['code', 'vessel'], inplace=True) + self.logger.write_time_log('ShippingPortSector', 'add_timezone', timeit.default_timer() - spent_time) + + return dataframe + + def add_dates(self, dataframe): + """ + Add the 'date' and 'tstep' column to the dataframe. + + Overwrites the method of the super class. + + The dataframe will be replicated as many times as time steps to calculate. + + :param dataframe: Geodataframe to be extended with the dates. + :type dataframe: GeoDataFrame + + :return: DataFrame with the dates. The length of the new dataframe is the length of the input dataframe + multiplied by the number of time steps. + :rtype: DataFrame + + """ + spent_time = timeit.default_timer() + dataframe.reset_index(inplace=True) + df_list = [] + for tstep, date in enumerate(self.date_array): + df_aux = dataframe.copy() + df_aux['date'] = pd.to_datetime(date, utc=True) + df_aux['date_utc'] = pd.to_datetime(date, utc=True) + df_aux['tstep'] = tstep + # df_aux = self.to_timezone(df_aux) + df_list.append(df_aux) + dataframe = pd.concat(df_list, ignore_index=True) + + dataframe = self.to_timezone(dataframe) + dataframe.drop('date_utc', axis=1, inplace=True) + dataframe.set_index(['code', 'vessel', 'tstep'], inplace=True) + # del dataframe['date_utc'] + self.logger.write_time_log('ShippingPortSector', 'add_dates', timeit.default_timer() - spent_time) + + return dataframe + + def get_constants(self): + """ + Create a dataframe with all the needed constants for each port & vessel. + + - N: + - P: + - Rae: + - LF_mm: + - LF_hm: + - LF_ma: + - LF_ha: + - T_m: + - T_h: + - EF_: Emission factor for each pollutant. Units: + :return: + """ + # TODO Add constants description and Units + spent_time = timeit.default_timer() + + def get_n(df): + """ + Get the N value (XXXXXX) depending on the vessel type. + + :param df: Dataframe where find their N value. df.name is the vessel type. + :type df: pandas.Dataframe + + :return: DataFrame with whe N column. + :rtype: DataFrame + """ + aux = self.tonnage.loc[:, ['N_{0}'.format(df.name)]].reset_index() + aux['vessel'] = df.name + aux.set_index(['code', 'vessel'], inplace=True) + df['N'] = aux['N_{0}'.format(df.name)] + return df.loc[:, ['N']] + + def get_p(df): + """ + Get the P value (XXXXXX) depending on the vessel type. + + :param df: Dataframe where find their P value. df.name is the vessel type. + :type df: pandas.Dataframe + + :return: DataFrame with whe P column. + :rtype: DataFrame + """ + aux = self.tonnage.loc[:, ['GT_{0}'.format(df.name)]].reset_index() + aux.rename(columns={'GT_{0}'.format(df.name): 'GT'}, inplace=True) + aux['vessel'] = df.name + aux.set_index(['code', 'vessel'], inplace=True) + aux['P'] = np.power(aux['GT'], self.power_values.loc[self.power_values['Type_vessel'] == df.name, + 'GT_exp'].values[0]) + df['P'] = aux['P'].multiply(self.power_values.loc[self.power_values['Type_vessel'] == df.name, + 'Value'].values[0]) + return df.loc[:, ['P']] + + def get_rae(df): + """ + Get the Rae value (XXXXXX) depending on the vessel type. + + :param df: Dataframe where find their Rae value. df.name is the vessel type. + :type df: pandas.Dataframe + + :return: DataFrame with whe Rae column. + :rtype: DataFrame + """ + df['Rae'] = self.power_values.loc[self.power_values['Type_vessel'] == df.name, 'Ratio_AE'].values[0] + return df.loc[:, ['Rae']] + + def get_t(df, phase): + """ + Get the T value (XXXXXX) for the selected phase depending on the vessel type. + + :param df: Dataframe where find their N value. df.name is the vessel type. + :type df: pandas.Dataframe + + :param phase: Phase type to select the T_ value. 'manoeuvring' or 'hoteling'. + :type phase: str + + :return: DataFrame with whe T_ column. + :rtype: DataFrame + """ + df['T'] = self.load_factor.loc[(self.load_factor['Type_vessel'] == df.name) & + (self.load_factor['Phase'] == phase), 'time'].values[0] + return df.loc[:, ['T']] + + def get_lf(df, phase, engine): + """ + Get the LF value (XXXXXX) for the selected phase and engine depending on the vessel type. + + :param df: Dataframe where find their N value. df.name is the vessel type. + :type df: pandas.Dataframe + + :param phase: Phase type to select the T_ value. 'manoeuvring' or 'hoteling'. + :type phase: str + + :param engine: Engine type to select the T_ value. 'main' or 'aux'. + :type engine: str + + :return: DataFrame with whe T_ column. + :rtype: DataFrame + """ + if engine == 'main': + col_name = 'LF_ME' + else: + col_name = 'LF_AE' + df['LF'] = self.load_factor.loc[(self.load_factor['Type_vessel'] == df.name) & + (self.load_factor['Phase'] == phase), col_name].values[0] + return df.loc[:, ['LF']] + + def get_ef(df, engine, poll): + """ + Get the EF value (Emission Factor) for the selected pollutant and engine depending on the vessel type. + + :param df: Dataframe where find their N value. df.name is the vessel type. + :type df: pandas.Dataframe + + :param poll: Pollutant to select the emission factor value. + :type poll: str + + :param engine: Engine type to select the T_ value. 'main' or 'aux'. + :type engine: str + + :return: DataFrame with whe T_ column. + :rtype: DataFrame + """ + + if engine == 'main': + engine = 'ME' + else: + engine = 'AE' + aux1 = self.engine_percent.loc[(self.engine_percent['Type_vessel'] == df.name) & + (self.engine_percent['Engine'] == engine), ['Engine_fuel', 'Factor']] + aux2 = self.ef_engine.loc[(self.ef_engine['Engine'] == engine) & + (self.ef_engine['Engine_fuel'].isin(aux1['Engine_fuel'].values)), + ['Engine_fuel', 'EF_{0}'.format(poll)]] + + aux = pd.merge(aux1, aux2, on='Engine_fuel') + aux['value'] = aux['Factor'] * aux['EF_{0}'.format(poll)] + df['EF'] = aux['value'].sum() + return df.loc[:, ['EF']] + + dataframe = pd.DataFrame(index=pd.MultiIndex.from_product([self.port_list, self.vessel_list], + names=['code', 'vessel'])) + dataframe['N'] = dataframe.groupby('vessel').apply(get_n) + dataframe['P'] = dataframe.groupby('vessel').apply(get_p) + dataframe['Rae'] = dataframe.groupby('vessel').apply(get_rae) + dataframe['LF_mm'] = dataframe.groupby('vessel').apply(lambda x: get_lf(x, 'manoeuvring', 'main')) + dataframe['LF_hm'] = dataframe.groupby('vessel').apply(lambda x: get_lf(x, 'hoteling', 'main')) + dataframe['LF_ma'] = dataframe.groupby('vessel').apply(lambda x: get_lf(x, 'manoeuvring', 'aux')) + dataframe['LF_ha'] = dataframe.groupby('vessel').apply(lambda x: get_lf(x, 'hoteling', 'aux')) + dataframe['T_m'] = dataframe.groupby('vessel').apply(lambda x: get_t(x, 'manoeuvring')) + dataframe['T_h'] = dataframe.groupby('vessel').apply(lambda x: get_t(x, 'hoteling')) + for pollutant in self.source_pollutants: + dataframe['EF_m_{0}'.format(pollutant)] = dataframe.groupby('vessel').apply( + lambda x: get_ef(x, 'main', pollutant)) + dataframe['EF_a_{0}'.format(pollutant)] = dataframe.groupby('vessel').apply( + lambda x: get_ef(x, 'aux', pollutant)) + self.logger.write_time_log('ShippingPortSector', 'get_constants', timeit.default_timer() - spent_time) + + return dataframe + + def calculate_yearly_emissions_by_port_vessel(self): + """ + Calculate the yearly emissions by port and vessel for manoeuvring and hoteling phases. + + :return: Manoeuvring and hoteling yearly emissions by port and vessel. + :rtype: tuple + """ + spent_time = timeit.default_timer() + constants = self.get_constants() + manoeuvring = pd.DataFrame(index=constants.index) + hoteling = pd.DataFrame(index=constants.index) + for pollutant in self.source_pollutants: + manoeuvring['{0}'.format(pollutant)] = \ + constants['P'] * constants['N'] * constants['LF_mm'] * constants['T_m'] * \ + constants['EF_m_{0}'.format(pollutant)] + hoteling['{0}'.format(pollutant)] = \ + constants['P'] * constants['N'] * constants['LF_hm'] * constants['T_h'] * \ + constants['EF_m_{0}'.format(pollutant)] + manoeuvring['{0}'.format(pollutant)] += \ + constants['P'] * constants['Rae'] * constants['N'] * constants['LF_ma'] * constants['T_m'] * \ + constants['EF_a_{0}'.format(pollutant)] + hoteling['{0}'.format(pollutant)] += \ + constants['P'] * constants['Rae'] * constants['N'] * constants['LF_ha'] * constants['T_h'] * \ + constants['EF_a_{0}'.format(pollutant)] + self.logger.write_time_log('ShippingPortSector', 'calculate_yearly_emissions_by_port_vessel', + timeit.default_timer() - spent_time) + + return manoeuvring, hoteling + + def dates_to_month_weekday_hour(self, dataframe): + """ + Add 'month', 'weekday' and 'hour' columns to the given dataframe. + + :param dataframe: DataFrame where add the 'month', 'weekday' and 'hour' columns. + :type dataframe: DataFrame + + :return: DataFrame with the 'month', 'weekday' and 'hour' columns. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + dataframe['month'] = dataframe['date'].dt.month + dataframe['weekday'] = dataframe['date'].dt.weekday + dataframe['hour'] = dataframe['date'].dt.hour + self.logger.write_time_log('ShippingPortSector', 'dates_to_month_weekday_hour', + timeit.default_timer() - spent_time) + + return dataframe + + def calculate_monthly_emissions_by_port(self, dataframe): + """ + Calculate the monthly emissions by port. + + :param dataframe: DataFrame with the yearly emissions by port and vessel. + :type dataframe: DataFrame + + :return: + """ + spent_time = timeit.default_timer() + + def get_mf(df): + """ + Get the Monthly Factor for the given dataframe depending on the vessel and the month. + + :param df: DataFrame where find the monthly factor. df.name is (vessel, month) + :type df: DataFrame + + :return: DataFrame with only the MF column. + :rtype: DataFrame + """ + vessel = df.name[0] + month = df.name[1] + + if vessel not in list(np.unique(self.monthly_profiles['type'].values)): + vessel = 'default' + mf_df = self.monthly_profiles.loc[self.monthly_profiles['type'] == vessel, ['code', month]] + mf_df.rename(columns={month: 'MF'}, inplace=True) + mf_df.set_index('code', inplace=True) + df = df.join(mf_df, how='inner') + + return df.loc[:, ['MF']] + dataframe['MF'] = dataframe.groupby(['vessel', 'month']).apply(get_mf) + dataframe[self.source_pollutants] = dataframe[self.source_pollutants].multiply(dataframe['MF'], axis=0) + dataframe.drop(['MF', 'month'], axis=1, inplace=True) + + operations = {x: 'sum' for x in self.source_pollutants} + operations['weekday'] = 'max' + operations['hour'] = 'max' + operations['date'] = 'max' + dataframe = dataframe.groupby(level=['code', 'tstep']).agg(operations) + self.logger.write_time_log('ShippingPortSector', 'calculate_monthly_emissions_by_port', + timeit.default_timer() - spent_time) + + return dataframe + + def calculate_hourly_emissions_by_port(self, dataframe): + """ + Calcualte the hourly emissions by port. + + :param dataframe: DataFrame with the Monthly emissions by port. + :type dataframe: DataFrame + + :return: Hourly emissions DataFrame + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + def get_wf(df): + """ + Get the Weekly Factor for the given dataframe depending on the date. + + :param df: DataFrame where find the weekly factor. df.name is the date. + :type df: DataFrame + + :return: DataFrame with only the WF column. + :rtype: DataFrame + """ + weekly_profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc['default', :].to_dict(), + df.name) + df['WF'] = weekly_profile[df.name.weekday()] + return df.loc[:, ['WF']] + + def get_hf(df): + """ + Get the Hourly Factor for the given dataframe depending on the hour. + + :param df: DataFrame where find the hourly factor. df.name is the hour. + :type df: DataFrame + + :return: DataFrame with only the HF column. + :rtype: DataFrame + """ + hourly_profile = self.hourly_profiles.loc['default', :].to_dict() + hour_factor = hourly_profile[df.name] + + df['HF'] = hour_factor + return df.loc[:, ['HF']] + + dataframe['date_as_date'] = dataframe['date'].dt.date + + dataframe['WF'] = dataframe.groupby('date_as_date').apply(get_wf) + dataframe[self.source_pollutants] = dataframe[self.source_pollutants].multiply(dataframe['WF'], axis=0) + dataframe.drop(columns=['weekday', 'date', 'date_as_date', 'WF'], inplace=True) + + dataframe['HF'] = dataframe.groupby('hour').apply(get_hf) + dataframe[self.source_pollutants] = dataframe[self.source_pollutants].multiply(dataframe['HF'], axis=0) + dataframe.drop(columns=['hour', 'HF'], inplace=True) + self.logger.write_time_log('ShippingPortSector', 'calculate_hourly_emissions_by_port', + timeit.default_timer() - spent_time) + + return dataframe + + def to_port_geometry(self, dataframe, shapefile_path): + """ + Add the geometry to the emissions based on the weight of the ports. + + :param dataframe: DataFrame with the hourly emissions. + :type dataframe: padas.DataFrame + + :param shapefile_path: Path to the shapefile with the port geometries and their weights. + :type shapefile_path: str + + :return: + """ + spent_time = timeit.default_timer() + + def normalize_weight(df): + df['Weight'] = df['Weight'] / df['Weight'].sum() + return df.loc[:, ['Weight']] + + shapefile = gpd.read_file(shapefile_path) + shapefile = shapefile.loc[shapefile['Weight'] > 0, ['code', 'Weight', 'geometry']] + + shapefile['Weight'] = shapefile.groupby('code').apply(normalize_weight) + + shapefile.set_index('code', inplace=True) + dataframe.reset_index(inplace=True) + dataframe.set_index('code', inplace=True) + + dataframe = shapefile.join(dataframe, how='outer') + + dataframe[self.source_pollutants] = dataframe[self.source_pollutants].multiply(dataframe['Weight'], axis=0) + dataframe.drop(columns=['Weight'], inplace=True) + self.logger.write_time_log('ShippingPortSector', 'to_port_geometry', timeit.default_timer() - spent_time) + + return dataframe + + def to_grid_geometry(self, dataframe): + """ + Regrid the emissions from port geometries to grid geometries. + + :param dataframe: DataFrame with the hourly emissions distributed by port. + :type dataframe: GeoDataFrame + + :return: DataFrame with the hourly emissions distributed by grid cell. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + + dataframe.reset_index(inplace=True) + dataframe.drop(columns=['code'], inplace=True) + + dataframe.to_crs(self.grid_shp.crs, inplace=True) + dataframe['src_inter_fraction'] = dataframe.geometry.area + dataframe = self.spatial_overlays(dataframe, self.grid_shp, how='intersection') + dataframe['src_inter_fraction'] = dataframe.geometry.area / dataframe['src_inter_fraction'] + + dataframe[self.source_pollutants] = dataframe[self.source_pollutants].multiply(dataframe["src_inter_fraction"], + axis=0) + dataframe.rename(columns={'idx2': 'FID'}, inplace=True) + + dataframe.drop(columns=['src_inter_fraction', 'idx1', 'geometry'], inplace=True) + dataframe['layer'] = 0 + dataframe = dataframe.loc[:, ~dataframe.columns.duplicated()] + dataframe = dataframe.groupby(['FID', 'layer', 'tstep']).sum() + self.logger.write_time_log('ShippingPortSector', 'to_grid_geometry', timeit.default_timer() - spent_time) + + return dataframe + + def calculate_emissions(self): + """ + Main function to calculate the shipping port emissions. + + :return: Shipping port emissions with 'FID', 'layer' and 'tstep' index. + :rtype: padas.DataFrame + """ + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + self.logger.write_log('\t\tCalculating yearly emissions', message_level=2) + manoeuvring, hoteling = self.calculate_yearly_emissions_by_port_vessel() + # print manoeuvring.reset_index().groupby('code').sum() + # print hoteling.reset_index().groupby('code').sum() + manoeuvring = self.add_timezone(manoeuvring, self.maneuvering_shapefile_path) + hoteling = self.add_timezone(hoteling, self.hoteling_shapefile_path) + + manoeuvring = self.add_dates(manoeuvring) + hoteling = self.add_dates(hoteling) + + manoeuvring = self.dates_to_month_weekday_hour(manoeuvring) + hoteling = self.dates_to_month_weekday_hour(hoteling) + + self.logger.write_log('\t\tCalculating monthly emissions', message_level=2) + + manoeuvring = self.calculate_monthly_emissions_by_port(manoeuvring) + hoteling = self.calculate_monthly_emissions_by_port(hoteling) + + self.logger.write_log('\t\tCalculating hourly emissions', message_level=2) + + manoeuvring = self.calculate_hourly_emissions_by_port(manoeuvring) + hoteling = self.calculate_hourly_emissions_by_port(hoteling) + + # TODO pre-calculate distribution during initialization. + self.logger.write_log('\t\tDistributing emissions', message_level=2) + + manoeuvring = self.to_port_geometry(manoeuvring, self.maneuvering_shapefile_path) + hoteling = self.to_port_geometry(hoteling, self.hoteling_shapefile_path) + + manoeuvring = self.to_grid_geometry(manoeuvring) + hoteling = self.to_grid_geometry(hoteling) + + dataframe = pd.concat([manoeuvring, hoteling]) + dataframe = dataframe.groupby(['FID', 'layer', 'tstep']).sum() + + dataframe = self.speciate(dataframe, 'default') + + self.logger.write_log('\t\tShipping port emissions calculated', message_level=2) + self.logger.write_time_log('ShippingPortSector', 'calculate_emissions', timeit.default_timer() - spent_time) + return dataframe diff --git a/hermesv3_bu/sectors/traffic_area_sector.py b/hermesv3_bu/sectors/traffic_area_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..023f2405c502ccb6ca0b0ea0c6f7f206d77e2fb3 --- /dev/null +++ b/hermesv3_bu/sectors/traffic_area_sector.py @@ -0,0 +1,475 @@ +#!/usr/bin/env python + +import sys +import os +import timeit +import geopandas as gpd +import pandas as pd +import numpy as np +from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.io_server.io_netcdf import IoNetcdf + +pmc_list = ['pmc', 'PMC'] + + +class TrafficAreaSector(Sector): + def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + population_tiff_path, speciation_map_path, molecular_weights_path, + do_evaporative, gasoline_path, total_pop_by_prov, nuts_shapefile, speciation_profiles_evaporative, + evaporative_ef_file, temperature_dir, + do_small_cities, small_cities_shp, speciation_profiles_small_cities, small_cities_ef_file, + small_cities_monthly_profile, small_cities_weekly_profile, small_cities_hourly_profile): + spent_time = timeit.default_timer() + logger.write_log('===== TRAFFIC AREA SECTOR =====') + + super(TrafficAreaSector, self).__init__( + comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + None, None, None, speciation_map_path, None, molecular_weights_path) + + self.do_evaporative = do_evaporative + self.temperature_dir = temperature_dir + self.speciation_profiles_evaporative = self.read_speciation_profiles(speciation_profiles_evaporative) + self.evaporative_ef_file = evaporative_ef_file + if do_evaporative: + self.evaporative = self.init_evaporative(population_tiff_path, nuts_shapefile, gasoline_path, + total_pop_by_prov) + else: + self.evaporative = None + + self.do_small_cities = do_small_cities + self.speciation_profiles_small_cities = self.read_speciation_profiles(speciation_profiles_small_cities) + self.small_cities_ef_file = small_cities_ef_file + self.small_cities_monthly_profile = self.read_monthly_profiles(small_cities_monthly_profile) + self.small_cities_weekly_profile = self.read_weekly_profiles(small_cities_weekly_profile) + self.small_cities_hourly_profile = self.read_hourly_profiles(small_cities_hourly_profile) + if do_small_cities: + self.small_cities = self.init_small_cities(population_tiff_path, small_cities_shp) + else: + self.small_cities = None + + self.logger.write_time_log('TrafficAreaSector', '__init__', timeit.default_timer() - spent_time) + + def init_evaporative(self, global_path, provinces_shapefile, gasoline_path, total_pop_by_prov): + spent_time = timeit.default_timer() + + if self.comm.Get_rank() == 0: + if not os.path.exists(os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell.shp')): + pop = self.get_clipped_population( + global_path, os.path.join(self.auxiliary_dir, 'traffic_area', 'population.shp')) + pop = self.make_population_by_nuts( + pop, provinces_shapefile, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_NUT.shp'), + write_file=False) + pop = self.make_population_by_nuts_cell( + pop, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_NUT_cell.shp')) + + veh_cell = self.make_vehicles_by_cell( + pop, gasoline_path, pd.read_csv(total_pop_by_prov), + os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell.shp')) + else: + veh_cell = IoShapefile(self.comm).read_shapefile_serial( + os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell.shp')) + else: + veh_cell = None + + veh_cell = IoShapefile(self.comm).split_shapefile(veh_cell) + + self.logger.write_time_log('TrafficAreaSector', 'init_evaporative', timeit.default_timer() - spent_time) + return veh_cell + + def init_small_cities(self, global_path, small_cities_shapefile): + spent_time = timeit.default_timer() + if self.comm.Get_rank() == 0: + if not os.path.exists(os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_SMALL_cell.shp')): + pop = self.get_clipped_population( + global_path, os.path.join(self.auxiliary_dir, 'traffic_area', 'population.shp')) + pop = self.make_population_by_nuts( + pop, small_cities_shapefile, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_SMALL.shp'), + write_file=False) + pop = self.make_population_by_nuts_cell( + pop, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_SMALL_cell.shp')) + else: + pop = IoShapefile(self.comm).read_shapefile_serial( + os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_SMALL_cell.shp')) + else: + pop = None + pop = IoShapefile(self.comm).split_shapefile(pop) + + self.logger.write_time_log('TrafficAreaSector', 'init_small_cities', timeit.default_timer() - spent_time) + return pop + + def get_clipped_population(self, global_path, population_shapefile_path): + from hermesv3_bu.io_server.io_raster import IoRaster + spent_time = timeit.default_timer() + + if not os.path.exists(population_shapefile_path): + population_density = IoRaster(self.comm).clip_raster_with_shapefile_poly( + global_path, self.clip.shapefile, + os.path.join(self.auxiliary_dir, 'traffic_area', 'population.tif')) + population_density = IoRaster(self.comm).to_shapefile_serie(population_density) + else: + population_density = IoShapefile(self.comm).read_shapefile_serial(population_shapefile_path) + + self.logger.write_time_log('TrafficAreaSector', 'get_clipped_population', timeit.default_timer() - spent_time) + + return population_density + + def make_population_by_nuts(self, population_shape, nut_shp, pop_by_nut_path, write_file=True, csv_path=None, + column_id='ORDER07'): + spent_time = timeit.default_timer() + + if not os.path.exists(pop_by_nut_path): + nut_df = IoShapefile(self.comm).read_shapefile_serial(nut_shp) + population_shape['area_in'] = population_shape.geometry.area + df = gpd.overlay(population_shape, nut_df.to_crs(population_shape.crs), how='intersection') + df.crs = population_shape.crs + df.loc[:, 'data'] = df['data'] * (df.geometry.area / df['area_in']) + del df['area_in'] + if write_file: + IoShapefile(self.comm).write_shapefile_serial(df, pop_by_nut_path) + if csv_path is not None: + df = df.loc[:, ['data', column_id]].groupby(column_id).sum() + df.to_csv(csv_path) + else: + df = IoShapefile(self.comm).read_shapefile_serial(pop_by_nut_path) + + self.logger.write_time_log('TrafficAreaSector', 'make_population_by_nuts', timeit.default_timer() - spent_time) + return df + + def make_population_by_nuts_cell(self, pop_by_nut, pop_nut_cell_path, write_file=True): + spent_time = timeit.default_timer() + + if not os.path.exists(pop_nut_cell_path): + + pop_by_nut = pop_by_nut.to_crs(self.grid_shp.crs) + + del pop_by_nut['NAME'] + pop_by_nut['area_in'] = pop_by_nut.geometry.area + + # df = gpd.overlay(pop_by_nut, grid_shp, how='intersection') + df = self.spatial_overlays(pop_by_nut, self.grid_shp.reset_index(), how='intersection') + + df.crs = self.grid_shp.crs + df.loc[:, 'data'] = df['data'] * (df.geometry.area / df['area_in']) + del pop_by_nut['area_in'] + if write_file: + IoShapefile(self.comm).write_shapefile_serial(df, pop_nut_cell_path) + else: + df = IoShapefile(self.comm).read_shapefile_serial(pop_nut_cell_path) + + self.logger.write_time_log('TrafficAreaSector', 'make_population_by_nuts_cell', + timeit.default_timer() - spent_time) + return df + + def make_vehicles_by_cell(self, pop_nut_cell, gasoline_path, total_pop_by_nut, veh_by_cell_path, + column_id='ORDER07'): + spent_time = timeit.default_timer() + + if not os.path.exists(veh_by_cell_path): + + total_pop_by_nut.loc[:, column_id] = total_pop_by_nut[column_id].astype(np.int16) + pop_nut_cell.loc[:, column_id] = pop_nut_cell[column_id].astype(np.int16) + + df = pop_nut_cell.merge(total_pop_by_nut, left_on=column_id, right_on=column_id, how='left') + + df['pop_percent'] = df['data_x'] / df['data_y'] + del df['data_x'], df['data_y'], df['CELL_ID'] + + gas_df = pd.read_csv(gasoline_path, index_col='COPERT_V_name').transpose() + vehicle_type_list = list(gas_df.columns.values) + gas_df.loc[:, column_id] = gas_df.index.astype(np.int16) + + df = df.merge(gas_df, left_on=column_id, right_on=column_id, how='left') + for vehicle_type in vehicle_type_list: + df.loc[:, vehicle_type] = df[vehicle_type] * df['pop_percent'] + + del df['pop_percent'], df[column_id] + + aux_df = df.loc[:, ['FID'] + vehicle_type_list].groupby('FID').sum() + aux_df.loc[:, 'FID'] = aux_df.index + + geom = self.grid_shp.loc[aux_df.index, 'geometry'] + + df = gpd.GeoDataFrame(aux_df, geometry=geom, crs=pop_nut_cell.crs) + IoShapefile(self.comm).write_shapefile_serial(df, veh_by_cell_path) + else: + df = IoShapefile(self.comm).read_shapefile_serial(veh_by_cell_path) + + self.logger.write_time_log('TrafficAreaSector', 'make_vehicles_by_cell', timeit.default_timer() - spent_time) + return df + + def get_profiles_from_temperature(self, temperature, default=False): + spent_time = timeit.default_timer() + + temperature = temperature.copy() + if default: + default_profile = np.array( + [0.025, 0.025, 0.025, 0.025, 0.025, 0.027083, 0.03125, 0.0375, 0.045833, 0.05625, 0.060417, 0.066667, + 0.06875, 0.072917, 0.070833, 0.064583, 0.05625, 0.045833, 0.0375, 0.03125, 0.027083, 0.025, 0.025, + 0.025]) + for x in xrange(24): + temperature['t_{0}'.format(x)] = default_profile[x] + + else: + temp_list = ['t_{0}'.format(x) for x in xrange(24)] + temperature.loc[:, temp_list] = temperature[temp_list] + 273.15 + + temperature.loc[:, temp_list] = temperature[temp_list].subtract(temperature[temp_list].min(axis=1), axis=0) + + temperature.loc[:, temp_list] = temperature[temp_list].div( + temperature[temp_list].max(axis=1) - temperature[temp_list].min(axis=1), axis=0) + + aux = temperature[temp_list].replace({0: np.nan}) + second_min = aux[temp_list].min(axis=1) + + temperature.loc[:, temp_list] = temperature[temp_list].add(second_min, axis=0) + temperature.loc[:, temp_list] = temperature[temp_list].div(temperature[temp_list].sum(axis=1), axis=0) + + self.logger.write_time_log('TrafficAreaSector', 'get_profiles_from_temperature', + timeit.default_timer() - spent_time) + return temperature + + def calculate_evaporative_emissions(self): + spent_time = timeit.default_timer() + + veh_list = list(self.evaporative.columns.values) + veh_list.remove('FID') + veh_list.remove('geometry') + if 'T_REC' in veh_list: + veh_list.remove('T_REC') + + crs = self.evaporative.crs + geom = self.evaporative.geometry + + # get average daily temperature by cell + aux_df = self.evaporative.loc[:, 'geometry'].to_crs({'init': 'epsg:4326'}) + self.evaporative['c_lat'] = aux_df.centroid.y + self.evaporative['c_lon'] = aux_df.centroid.x + self.evaporative['centroid'] = aux_df.centroid + + temperature = IoNetcdf(self.comm).get_hourly_data_from_netcdf( + self.evaporative['c_lon'].min(), self.evaporative['c_lon'].max(), self.evaporative['c_lat'].min(), + self.evaporative['c_lat'].max(), self.temperature_dir, 'tas', self.date_array) + temperature.rename(columns={x: 't_{0}'.format(x) for x in xrange(len(self.date_array))}, inplace=True) + # From Kelvin to Celsius degrees + temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] = \ + temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] - 273.15 + + temperature_mean = gpd.GeoDataFrame(temperature[['t_{0}'.format(x) for x in + xrange(len(self.date_array))]].mean(axis=1), + columns=['temp'], geometry=temperature.geometry) + temperature_mean['REC'] = temperature['REC'] + + if 'T_REC' not in self.evaporative.columns.values: + self.evaporative['T_REC'] = self.evaporative.apply(self.nearest, geom_union=temperature_mean.unary_union, + df1=self.evaporative, df2=temperature_mean, + geom1_col='centroid', src_column='REC', axis=1) + del self.evaporative['c_lat'], self.evaporative['c_lon'], self.evaporative['centroid'] + IoShapefile(self.comm).write_shapefile_serial( + self.evaporative, os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell.shp')) + else: + del self.evaporative['c_lat'], self.evaporative['c_lon'], self.evaporative['centroid'] + + self.evaporative = self.evaporative.merge(temperature_mean, left_on='T_REC', right_on='REC', how='left') + + ef_df = pd.read_csv(self.evaporative_ef_file, sep=',') + ef_df.drop(columns=['canister', 'Copert_V_name'], inplace=True) + ef_df.loc[ef_df['Tmin'].isnull(), 'Tmin'] = -999 + ef_df.loc[ef_df['Tmax'].isnull(), 'Tmax'] = 999 + + for vehicle_type in veh_list: + + self.evaporative['EF'] = np.nan + ef_aux = ef_df.loc[ef_df['Code'] == vehicle_type] + for i, line in ef_aux.iterrows(): + self.evaporative.loc[(self.evaporative['temp'] < line.get('Tmax')) & + (self.evaporative['temp'] >= line.get('Tmin')), 'EF'] = \ + line.get('EFbase') * line.get('TF') + + self.evaporative.loc[:, vehicle_type] = self.evaporative[vehicle_type] * self.evaporative['EF'] + + self.evaporative.loc[:, 'nmvoc'] = self.evaporative.loc[:, veh_list].sum(axis=1) + self.evaporative = gpd.GeoDataFrame(self.evaporative.loc[:, ['nmvoc', 'T_REC', 'FID']], geometry=geom, crs=crs) + + self.evaporative = self.speciate_evaporative() + + self.evaporative = self.evaporative_temporal_distribution(self.get_profiles_from_temperature(temperature)) + + self.evaporative.set_index(['FID', 'tstep'], inplace=True) + + self.logger.write_time_log('TrafficAreaSector', 'calculate_evaporative_emissions', + timeit.default_timer() - spent_time) + return self.evaporative + + def evaporative_temporal_distribution(self, temporal_profiles): + spent_time = timeit.default_timer() + + aux = self.evaporative.merge(temporal_profiles, left_on='T_REC', right_on='REC', how='left') + + temporal_df_list = [] + pollutant_list = [e for e in self.evaporative.columns.values if e not in ('T_REC', 'FID', 'geometry')] + + for tstep, date in enumerate(self.date_array): + aux_temporal = aux[pollutant_list].multiply(aux['t_{0}'.format(date.hour)], axis=0) + aux_temporal['FID'] = aux['FID'] + aux_temporal['tstep'] = tstep + temporal_df_list.append(aux_temporal) + df = pd.concat(temporal_df_list) + + self.logger.write_time_log('TrafficAreaSector', 'evaporative_temporal_distribution', + timeit.default_timer() - spent_time) + return df + + def speciate_evaporative(self): + spent_time = timeit.default_timer() + + speciated_df = self.evaporative.drop(columns=['nmvoc']) + out_p_list = [out_p for out_p, in_p_aux in self.speciation_map.iteritems() if in_p_aux == 'nmvoc'] + + for p in out_p_list: + # From g/day to mol/day + speciated_df[p] = self.evaporative['nmvoc'] * self.speciation_profiles_evaporative.loc['default', p] + + self.logger.write_time_log('TrafficAreaSector', 'speciate_evaporative', timeit.default_timer() - spent_time) + return speciated_df + + def small_cities_emissions_by_population(self, df): + spent_time = timeit.default_timer() + + df = df.loc[:, ['data', 'FID']].groupby('FID').sum() + # print pop_nut_cell + ef_df = pd.read_csv(self.small_cities_ef_file, sep=',') + # print ef_df + ef_df.drop(['Code', 'Copert_V_name'], axis=1, inplace=True) + for pollutant in ef_df.columns.values: + # print ef_df[pollutant].iloc[0] + df[pollutant] = df['data'] * ef_df[pollutant].iloc[0] + df.drop('data', axis=1, inplace=True) + + self.logger.write_time_log('TrafficAreaSector', 'small_cities_emissions_by_population', + timeit.default_timer() - spent_time) + return df + + def add_timezones(self, grid, default=False): + from timezonefinder import TimezoneFinder + spent_time = timeit.default_timer() + + if default: + grid['timezone'] = 'Europe/Madrid' + else: + tz = TimezoneFinder() + aux_grid = grid.to_crs({'init': 'epsg:4326'}) + aux_grid['lats'] = aux_grid.geometry.centroid.y + aux_grid['lons'] = aux_grid.geometry.centroid.x + inc = 1 + + while len(grid.loc[grid['timezone'] == '', :]) > 0: + print len(grid.loc[grid['timezone'] == '', :]) + grid.loc[grid['timezone'] == '', 'timezone'] = aux_grid.loc[grid['timezone'] == '', :].apply( + lambda x: tz.closest_timezone_at(lng=x['lons'], lat=x['lats'], delta_degree=inc), axis=1) + inc += 1 + + self.logger.write_time_log('TrafficAreaSector', 'add_timezones', timeit.default_timer() - spent_time) + return grid + + def temporal_distribution_small(self, small_cities): + import pytz + spent_time = timeit.default_timer() + + p_names = small_cities.columns.values + + aux_grid = self.grid_shp.loc[self.grid_shp['FID'].isin(small_cities.index.values), :] + + aux_grid = self.add_timezone(aux_grid) + aux_grid.set_index('FID', inplace=True) + + small_cities = small_cities.merge(aux_grid.loc[:, ['timezone']], left_index=True, right_index=True, + how='left') + small_cities.loc[:, 'utc'] = self.date_array[0] + small_cities['date'] = small_cities.groupby('timezone')['utc'].apply( + lambda x: pd.to_datetime(x).dt.tz_localize(pytz.utc).dt.tz_convert(x.name).dt.tz_localize(None)) + small_cities.drop(['utc', 'timezone'], inplace=True, axis=1) + # print small_cities + + df_list = [] + for tstep in xrange(len(self.date_array)): + small_cities['month'] = small_cities['date'].dt.month + small_cities['weekday'] = small_cities['date'].dt.dayofweek + small_cities['hour'] = small_cities['date'].dt.hour + small_cities.loc[small_cities['weekday'] <= 4, 'day_type'] = 'Weekday' + small_cities.loc[small_cities['weekday'] == 5, 'day_type'] = 'Saturday' + small_cities.loc[small_cities['weekday'] == 6, 'day_type'] = 'Sunday' + + for i, aux in small_cities.groupby(['month', 'weekday', 'hour', 'day_type']): + aux_date = pd.Timestamp(aux['date'].values[0]) + + balanced_weekly_profile = self.calculate_rebalanced_weekly_profile( + self.small_cities_weekly_profile.loc['default', :].to_dict(), aux_date) + small_cities.loc[aux.index, 'f'] = self.small_cities_monthly_profile.loc['default', i[0]] * \ + balanced_weekly_profile[i[1]] * self.small_cities_hourly_profile.loc[i[3], i[2]] + + aux_df = small_cities.loc[:, p_names].multiply(small_cities['f'], axis=0) + aux_df['tstep'] = tstep + aux_df.set_index('tstep', append=True, inplace=True) + df_list.append(aux_df) + + small_cities['date'] = small_cities['date'] + pd.to_timedelta(1, unit='h') + df = pd.concat(df_list) + + self.logger.write_time_log('TrafficAreaSector', 'temporal_distribution_small', + timeit.default_timer() - spent_time) + return df + + def calculate_small_cities_emissions(self): + spent_time = timeit.default_timer() + + # EF + self.small_cities = self.small_cities_emissions_by_population(self.small_cities) + + # Spectiacion + self.speciation_profile = self.speciation_profiles_small_cities + self.small_cities = self.speciate(self.small_cities) + # From kmol/h or kg/h to mol/h or g/h + self.small_cities = self.small_cities.mul(1000.0) + + # Temporal + # grid = self.add_timezones(gpd.read_file(os.path.join(self.auxiliary_dir, 'shapefile', 'grid_shapefile.shp')), + # default=True) + self.small_cities = self.temporal_distribution_small(self.small_cities) + + self.logger.write_time_log('TrafficAreaSector', 'calculate_small_cities_emissions', + timeit.default_timer() - spent_time) + + return True + + def to_grid(self): + spent_time = timeit.default_timer() + + if self.do_evaporative and self.do_small_cities: + dataset = pd.concat([self.evaporative, self.small_cities]) + elif self.do_evaporative: + dataset = self.evaporative + elif self.do_small_cities: + dataset = self.small_cities + else: + raise ValueError('No traffic area emission selected. do_evaporative and do_small_cities are False') + + dataset['layer'] = 0 + dataset = dataset.groupby(['FID', 'layer', 'tstep']).sum() + + self.logger.write_time_log('TrafficAreaSector', 'to_grid', timeit.default_timer() - spent_time) + return dataset + + def calculate_emissions(self): + spent_time = timeit.default_timer() + + if self.do_evaporative: + self.calculate_evaporative_emissions() + if self.do_small_cities: + self.calculate_small_cities_emissions() + + emissions = self.to_grid() + + self.logger.write_log('\t\tTraffic area emissions calculated', message_level=2) + self.logger.write_time_log('TrafficAreaSector', 'calculate_emissions', timeit.default_timer() - spent_time) + return emissions diff --git a/hermesv3_bu/sectors/traffic_sector.py b/hermesv3_bu/sectors/traffic_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..2df4f77c2a3908fd08bb51c76906d9079f0df87f --- /dev/null +++ b/hermesv3_bu/sectors/traffic_sector.py @@ -0,0 +1,1366 @@ +#!/usr/bin/env python +import sys +import os +import timeit + +import pandas as pd +import geopandas as gpd +import numpy as np +from datetime import timedelta +import warnings +from hermesv3_bu.logger.log import Log +from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.io_server.io_netcdf import IoNetcdf + +from ctypes import cdll, CDLL +cdll.LoadLibrary("libc.so.6") +libc = CDLL("libc.so.6") +libc.malloc_trim(0) + +MIN_RAIN = 0.254 # After USEPA (2011) +RECOVERY_RATIO = 0.0872 # After Amato et al. (2012) + + +aerosols = ['oc', 'ec', 'pno3', 'pso4', 'pmfine', 'pmc', 'poa', 'poc', 'pec', 'pcl', 'pnh4', 'pna', 'pmg', 'pk', 'pca', + 'pncom', 'pfe', 'pal', 'psi', 'pti', 'pmn', 'ph2o', 'pmothr'] +pmc_list = ['pmc', 'PMC'] +rline_shp = False + + +class TrafficSector(Sector): + # TODO MARC -> to revise these descriptions + """ + The traffic class does have all the necessary functions to calculate the traffic emission in bottom-up mode. + + Part of the traffic emissions are calculated by roadlink (hot, cold, road wear, tyre wear, brake wear and + resuspension) differentiating by vehicle type. + The other emissions (other cities and evaporative) are calculated by cell instead of by road link. + + To calculate the traffic emissions some input files are needed as the shapefile that contains the information and + geolocalization of each road link, the temporal proxies, the emission factors files and also the information + relative to the timesteps. + """ + + def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + road_link_path, fleet_compo_path, speed_hourly_path, monthly_profiles_path, weekly_profiles_path, + hourly_mean_profiles_path, hourly_weekday_profiles_path, hourly_saturday_profiles_path, + hourly_sunday_profiles_path, ef_common_path, vehicle_list=None, load=0.5, speciation_map_path=None, + hot_cold_speciation=None, tyre_speciation=None, road_speciation=None, brake_speciation=None, + resuspension_speciation=None, temp_common_path=None, output_dir=None, molecular_weights_path=None, + resuspension_correction=True, precipitation_path=None, do_hot=True, do_cold=True, do_tyre_wear=True, + do_brake_wear=True, do_road_wear=True, do_resuspension=True, write_rline=False): + + spent_time = timeit.default_timer() + logger.write_log('===== TRAFFIC SECTOR =====') + super(TrafficSector, self).__init__( + comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + monthly_profiles_path, weekly_profiles_path, None, speciation_map_path, None, molecular_weights_path) + + self.resuspension_correction = resuspension_correction + self.precipitation_path = precipitation_path + + self.output_dir = output_dir + + self.link_to_grid_csv = os.path.join(auxiliary_dir, 'traffic', 'link_grid.csv') + if self.comm.Get_rank() == 0: + if not os.path.exists(os.path.dirname(self.link_to_grid_csv)): + os.makedirs(os.path.dirname(self.link_to_grid_csv)) + self.comm.Barrier() + self.crs = None # crs is the projection of the road links and it is set on the read_road_links function. + self.write_rline = write_rline + self.road_links = self.read_road_links(road_link_path) + self.load = load + self.ef_common_path = ef_common_path + self.temp_common_path = temp_common_path + # TODO use only date_array + self.timestep_num = len(self.date_array) + self.timestep_freq = 1 + self.starting_date = self.date_array[0] + self.add_local_date(self.date_array[0]) + + self.hot_cold_speciation = hot_cold_speciation + self.tyre_speciation = tyre_speciation + self.road_speciation = road_speciation + self.brake_speciation = brake_speciation + self.resuspension_speciation = resuspension_speciation + + self.fleet_compo = self.read_fleet_compo(fleet_compo_path, vehicle_list) + self.speed_hourly = self.read_speed_hourly(speed_hourly_path) + + self.hourly_profiles = pd.concat([ + pd.read_csv(hourly_mean_profiles_path), + pd.read_csv(hourly_weekday_profiles_path), + pd.read_csv(hourly_saturday_profiles_path), + pd.read_csv(hourly_sunday_profiles_path) + ]).reset_index() + + self.expanded = self.expand_road_links('hourly', len(self.date_array), 1) + + del self.fleet_compo, self.speed_hourly, self.monthly_profiles, self.weekly_profiles, self.hourly_profiles + + self.do_hot = do_hot + self.do_cold = do_cold + self.do_tyre_wear = do_tyre_wear + self.do_brake_wear = do_brake_wear + self.do_road_wear = do_road_wear + self.do_resuspension = do_resuspension + + self.logger.write_time_log('TrafficSector', '__init__', timeit.default_timer() - spent_time) + + def read_speciation_map(self, path): + """ + Read the speciation map. + + The speciation map is the CSV file that contains the relation from the output pollutant and the correspondent + input pollutant associated. That file also contains a short description of the output pollutant and the units to + be stored. + + e.g.: + dst,src,description,units + NOx,nox_no2,desc_no,mol.s-1 + SOx,so2,desc_so2,mol.s-1 + CO,co,desc_co,mol.s-1 + CO2,co2,desc_co2,mol.s-1 + NMVOC,nmvoc,desc_nmvoc,g.s-1 + PM10,pm10,desc_pm10,g.s-1 + PM25,pm25,desc_pm25,g.s-1 + PMC,,desc_pmc,g.s-1 + + :param path: Path to the speciation map file. + :type path: str + + :return: Dictionary with the output pollutant as key and the input pollutant as value. + :rtype: dict + """ + spent_time = timeit.default_timer() + speciation_map = pd.read_csv(path) + dataframe = pd.read_csv(path) + # input_pollutants = list(self.source_pollutants) + input_pollutants = ['nmvoc' if x == 'voc' else x for x in list(self.source_pollutants)] + if 'PMC' in dataframe['dst'].values and all(element in input_pollutants for element in ['pm']): + dataframe_aux = dataframe.loc[dataframe['src'].isin(input_pollutants), :] + dataframe = pd.concat([dataframe_aux, dataframe.loc[dataframe['dst'] == 'PMC', :]]) + else: + dataframe = dataframe.loc[dataframe['src'].isin(input_pollutants), :] + + dataframe = dict(zip(dataframe['dst'], dataframe['src'])) + if 'pm' in self.source_pollutants: + for out_p, in_p in zip(speciation_map[['dst']].values, speciation_map[['src']].values): + if in_p in ['pm10', 'pm25']: + dataframe[out_p[0]] = in_p[0] + # if 'pm' in self.source_pollutants and 'PM10' in speciation_map[['dst']].values: + # dataframe['PM10'] = 'pm10' + # if 'pm' in self.source_pollutants and 'PM25' in speciation_map[['dst']].values: + # dataframe['PM25'] = 'pm25' + self.logger.write_time_log('TrafficSector', 'read_speciation_map', timeit.default_timer() - spent_time) + + return dataframe + + def add_local_date(self, utc_date): + """ + Adds to the road links the starting date in local time. + This new column is called 'start_date'. + + :param utc_date: Starting date in UTC. + """ + import pytz + spent_time = timeit.default_timer() + + self.add_timezones() + self.road_links.loc[:, 'utc'] = utc_date + self.road_links['start_date'] = self.road_links.groupby('timezone')['utc'].apply( + lambda x: pd.to_datetime(x).dt.tz_localize(pytz.utc).dt.tz_convert(x.name).dt.tz_localize(None)) + + del self.road_links['utc'], self.road_links['timezone'] + + self.logger.write_time_log('TrafficSector', 'add_local_date', timeit.default_timer() - spent_time) + return True + + def add_timezones(self): + """ + Finds and sets the timezone for each road link. + """ + spent_time = timeit.default_timer() + # TODO calculate timezone from the centroid of each roadlink. + + self.road_links['timezone'] = 'Europe/Madrid' + + self.logger.write_time_log('TrafficSector', 'add_timezones', timeit.default_timer() - spent_time) + return True + + def read_speed_hourly(self, path): + # TODO complete description + """ + Reads the speed hourly file. + + :param path: Path to the speed hourly file. + :type path: str: + + :return: ... + :rtype: Pandas.DataFrame + """ + spent_time = timeit.default_timer() + + df = pd.read_csv(path, sep=',', dtype=np.float32) + df['P_speed'] = df['P_speed'].astype(int) + # df.set_index('P_speed', inplace=True) + self.logger.write_time_log('TrafficSector', 'read_speed_hourly', timeit.default_timer() - spent_time) + return df + + def read_fleet_compo(self, path, vehicle_list): + spent_time = timeit.default_timer() + df = pd.read_csv(path, sep=',') + if vehicle_list is not None: + df = df.loc[df['Code'].isin(vehicle_list), :] + self.logger.write_time_log('TrafficSector', 'read_fleet_compo', timeit.default_timer() - spent_time) + return df + + def read_road_links(self, path): + def chunk_road_links(df, nprocs): + def index_marks(nrows, nprocs): + max_len = int(nrows // nprocs) + 1 + min_len = max_len - 1 + max_num = nrows % nprocs + min_num = nprocs - max_num + index_list = [] + prev = 0 + for i in xrange(max_num): + prev += max_len + index_list.append(prev) + if min_num > 0: + for i in xrange(min_num - 1): + prev += min_len + index_list.append(prev) + + return index_list + + def split(dfm, nprocs): + indices = index_marks(dfm.shape[0], nprocs) + return np.split(dfm, indices) + + chunks_aux = split(df, nprocs) + return chunks_aux + + spent_time = timeit.default_timer() + + if self.comm.Get_rank() == 0: + df = gpd.read_file(path) + + df = gpd.sjoin(df, self.clip.shapefile.to_crs(df.crs), how="inner", op='intersects') + + # Filtering road links to CONSiderate. + df['CONS'] = df['CONS'].astype(np.int16) + df = df[df['CONS'] != 0] + df = df[df['aadt'] > 0] + + # TODO Manu update shapefile replacing NULL values on 'aadt_m-mn' column + df = df.loc[df['aadt_m_mn'] != 'NULL', :] + + # Adding identificator of road link + df['Link_ID'] = xrange(len(df)) + + del df['Adminis'], df['CCAA'], df['CONS'], df['NETWORK_ID'] + del df['Province'], df['Road_name'] + + # Deleting unused columns + del df['aadt_m_sat'], df['aadt_m_sun'], df['aadt_m_wd'], df['Source'] + libc.malloc_trim(0) + chunks = chunk_road_links(df, self.comm.Get_size()) + else: + chunks = None + self.comm.Barrier() + + df = self.comm.scatter(chunks, root=0) + del chunks + libc.malloc_trim(0) + df = df.to_crs({'init': 'epsg:4326'}) + + self.crs = df.crs + + # Correcting percentages + df['PcMoto'] = df['PcMoto'] / 100 + df['PcHeavy'] = df['PcHeavy'] / 100 + df['PcMoped'] = df['PcMoped'] / 100 + df['PcLight'] = 1 - (df['PcMoto'] + df['PcHeavy'] + df['PcMoped']) + + # Road_type int to string + df['Road_type'] = df['Road_type'].astype(str) + df.loc[df['Road_type'] == '0', 'Road_type'] = 'Highway' + df.loc[df['Road_type'] == '1', 'Road_type'] = 'Rural' + df.loc[df['Road_type'] == '2', 'Road_type'] = 'Urban Off Peak' + df.loc[df['Road_type'] == '3', 'Road_type'] = 'Urban Peak' + + # TODO Read with units types + df['road_grad'] = df['road_grad'].astype(float) + + # Check if percents are ok + if len(df[df['PcLight'] < 0]) is not 0: + print 'ERROR: PcLight < 0' + exit(1) + + if self.write_rline: + self.write_rline_roadlinks(df) + + self.logger.write_time_log('TrafficSector', 'read_road_links', timeit.default_timer() - spent_time) + libc.malloc_trim(0) + return df + + def read_ef(self, emission_type, pollutant_name): + """ + Reads the file that contains the necessary emission factor for the current pollutant and emission type. + + Depending on the emission tyme the file contain different columns. + + :param emission_type: Type of the emission. It can be hot, cold, tyre, road, brake or resuspension. + :type emission_type: str + + :param pollutant_name: Name of the pollutant to read their emission factor. + :type pollutant_name:str + + :return: Returns the readed emission factor in DataFrame mode. + :rtype: Pandas.DataFrame + """ + spent_time = timeit.default_timer() + + ef_path = os.path.join(self.ef_common_path, '{0}_{1}.csv'.format(emission_type, pollutant_name)) + df = self.read_profiles(ef_path) + + # Pollutants different to NH3 + if pollutant_name != 'nh3': + del df['Copert_V_name'] + + # For hot emission factors + if emission_type == 'hot': + df = df[(df['Load'] == self.load) | (df['Load'].isnull())] + + df.loc[df['Technology'].isnull(), 'Technology'] = '' + df = df[df['Technology'] != 'EGR'] + + del df['Technology'], df['Load'] + + # Split the EF file into small DataFrames divided by column Road.Slope and Mode restrictions. + df_code_slope_road = df[df['Road.Slope'].notnull() & df['Mode'].notnull()] + df_code_slope = df[df['Road.Slope'].notnull() & (df['Mode'].isnull())] + df_code_road = df[df['Road.Slope'].isnull() & (df['Mode'].notnull())] + df_code = df[df['Road.Slope'].isnull() & (df['Mode'].isnull())] + + # Checks that the splited DataFrames contain the full DataFrame + if (len(df_code_slope_road) + len(df_code_slope) + len(df_code_road) + len(df_code)) != len(df): + # TODO check that error + print 'ERROR in blablavbla' + + return df_code_slope_road, df_code_slope, df_code_road, df_code + elif emission_type == 'cold' or emission_type == 'tyre' or emission_type == 'road' or \ + emission_type == 'brake' or emission_type == 'resuspension': + return df + # NH3 pollutant + else: + del df['Copert_V_name'] + # Specific case for cold NH3 emission factors that needs the hot emission factors and the cold ones. + if emission_type == 'cold': + df_hot = self.read_ef('hot', pollutant_name) + df_hot.columns = [x + '_hot' for x in df_hot.columns.values] + + df = df.merge(df_hot, left_on=['Code', 'Mode'], right_on=['Code_hot', 'Mode_hot'], + how='left') + + del df['Cmileage_hot'], df['Mode_hot'], df['Code_hot'] + + return df + + self.logger.write_time_log('TrafficSector', 'read_ef', timeit.default_timer() - spent_time) + return None + + def read_mcorr_file(self, pollutant_name): + spent_time = timeit.default_timer() + try: + df_path = os.path.join(self.ef_common_path, 'mcorr_{0}.csv'.format(pollutant_name)) + + df = pd.read_csv(df_path, sep=',') + if 'Copert_V_name' in list(df.columns.values): + df.drop(columns=['Copert_V_name'], inplace=True) + except IOError: + self.logger.write_log('WARNING! No mileage correction applied to {0}'.format(pollutant_name)) + warnings.warn('No mileage correction applied to {0}'.format(pollutant_name)) + df = None + + self.logger.write_time_log('TrafficSector', 'read_ef', timeit.default_timer() - spent_time) + return df + + def calculate_precipitation_factor(self, lon_min, lon_max, lat_min, lat_max, precipitation_dir): + spent_time = timeit.default_timer() + + dates_to_extract = [self.date_array[0] + timedelta(hours=x - 47) for x in range(47)] + self.date_array + + precipitation = IoNetcdf(self.comm).get_hourly_data_from_netcdf( + lon_min, lon_max, lat_min, lat_max, precipitation_dir, 'prlr', dates_to_extract) + + precipitation.set_index('REC', inplace=True, drop=True) + + prlr = precipitation.drop(columns='geometry').values.T + + # From m/s to mm/h + prlr = prlr * (3600 * 1000) + prlr = prlr <= MIN_RAIN + dst = np.empty(prlr.shape) + last = np.zeros((prlr.shape[-1])) + for time in xrange(prlr.shape[0]): + dst[time, :] = (last + prlr[time, :]) * prlr[time, :] + last = dst[time, :] + + dst = dst[47:, :] + dst = 1 - np.exp(- RECOVERY_RATIO * dst) + # It is assumed that after 48 h without rain the potential emission is equal to one + dst[dst >= (1 - np.exp(- RECOVERY_RATIO * 48))] = 1. + # Creates the GeoDataFrame + df = gpd.GeoDataFrame(dst.T, geometry=precipitation.geometry) + df.columns = ['PR_{0}'.format(x) for x in df.columns.values[:-1]] + ['geometry'] + + df.loc[:, 'REC'] = df.index + + self.logger.write_time_log('TrafficSector', 'calculate_precipitation_factor', + timeit.default_timer() - spent_time) + return df + + def update_fleet_value(self, df): + spent_time = timeit.default_timer() + + # Calculating fleet value by fleet class + df.loc[:, 'Fleet_value'] = df['Fleet_value'] * df['aadt'] + + df.loc[df['Fleet_Class'] == 'light_veh', 'Fleet_value'] = df['PcLight'] * df['Fleet_value'] + df.loc[df['Fleet_Class'] == 'heavy_veh', 'Fleet_value'] = df['PcHeavy'] * df['Fleet_value'] + df.loc[df['Fleet_Class'] == 'motos', 'Fleet_value'] = df['PcMoto'] * df['Fleet_value'] + df.loc[df['Fleet_Class'] == 'mopeds', 'Fleet_value'] = df['PcMoped'] * df['Fleet_value'] + + for link_id, aux_df in df.groupby('Link_ID'): + aadt = round(aux_df['aadt'].min(), 1) + fleet_value = round(aux_df['Fleet_value'].sum(), 1) + if aadt != fleet_value: + self.logger.write_log('link_ID: {0} aadt: {1} sum_fleet: {2}'.format(link_id, aadt, fleet_value), + message_level=2) + + # Drop 0 values + df = df[df['Fleet_value'] > 0] + + # Deleting unused columns + del df['aadt'], df['PcLight'], df['PcHeavy'], df['PcMoto'], df['PcMoped'], df['Fleet_Class'] + self.logger.write_time_log('TrafficSector', 'update_fleet_value', timeit.default_timer() - spent_time) + return df + + def calculate_timedelta(self, timestep_type, num_tstep, timestep_freq): + from datetime import timedelta + spent_time = timeit.default_timer() + + delta = timedelta(hours=timestep_freq * num_tstep) + + self.logger.write_time_log('TrafficSector', 'calculate_timedelta', timeit.default_timer() - spent_time) + return pd.Timedelta(delta) + + def calculate_hourly_speed(self, df): + spent_time = timeit.default_timer() + + df = df.merge(self.speed_hourly, left_on='profile_id', right_on='P_speed', how='left') + df['speed'] = df.groupby('hour').apply(lambda x: x[[str(x.name)]]) + + self.logger.write_time_log('TrafficSector', 'calculate_hourly_speed', timeit.default_timer() - spent_time) + return df['speed'] * df['speed_mean'] + + def calculate_temporal_factor(self, df): + spent_time = timeit.default_timer() + + def get_hourly_id_from_weekday(weekday): + if weekday <= 4: + return 'aadt_h_wd' + elif weekday == 5: + return 'aadt_h_sat' + elif weekday == 6: + return 'aadt_h_sun' + else: + print 'ERROR: Weekday not found' + exit() + + # Monthly factor + df = df.merge(self.monthly_profiles.reset_index(), left_on='aadt_m_mn', right_on='P_month', how='left') + df['MF'] = df.groupby('month').apply(lambda x: x[[x.name]]) + df.drop(columns=range(1, 12 + 1), inplace=True) + + # Daily factor + df = df.merge(self.weekly_profiles.reset_index(), left_on='aadt_week', right_on='P_week', how='left') + + df['WF'] = df.groupby('week_day').apply(lambda x: x[[x.name]]) + df.drop(columns=range(0, 7), inplace=True) + + # Hourly factor + df['hourly_profile'] = df.groupby('week_day').apply(lambda x: x[[get_hourly_id_from_weekday(x.name)]]) + df.loc[df['hourly_profile'] == '', 'hourly_profile'] = df['aadt_h_mn'] + + df['hourly_profile'] = df['hourly_profile'].astype(str) + self.hourly_profiles['P_hour'] = self.hourly_profiles['P_hour'].astype(str) + + df = df.merge(self.hourly_profiles, left_on='hourly_profile', right_on='P_hour', how='left') + df['HF'] = df.groupby('hour').apply(lambda x: x[[str(x.name)]]) + + self.logger.write_time_log('TrafficSector', 'calculate_temporal_factor', timeit.default_timer() - spent_time) + return df['MF'] * df['WF'] * df['HF'] + + def calculate_time_dependent_values(self, df, timestep_type, timestep_num, timestep_freq): + spent_time = timeit.default_timer() + + df.reset_index(inplace=True) + for tstep in xrange(timestep_num): + # Finding weekday + # 0 -> Monday; 6 -> Sunday + df.loc[:, 'month'] = (df['start_date'] + self.calculate_timedelta( + timestep_type, tstep, timestep_freq)).dt.month + df.loc[:, 'week_day'] = (df['start_date'] + self.calculate_timedelta( + timestep_type, tstep, timestep_freq)).dt.weekday + df.loc[:, 'hour'] = (df['start_date'] + self.calculate_timedelta( + timestep_type, tstep, timestep_freq)).dt.hour + + # Selecting speed_mean + df.loc[df['week_day'] <= 4, 'speed_mean'] = df['sp_wd'] + df.loc[df['week_day'] > 4, 'speed_mean'] = df['sp_we'] + + # Selecting speed profile_id + df.loc[df['week_day'] == 0, 'profile_id'] = df['sp_hour_mo'] + df.loc[df['week_day'] == 1, 'profile_id'] = df['sp_hour_tu'] + df.loc[df['week_day'] == 2, 'profile_id'] = df['sp_hour_we'] + df.loc[df['week_day'] == 3, 'profile_id'] = df['sp_hour_th'] + df.loc[df['week_day'] == 4, 'profile_id'] = df['sp_hour_fr'] + df.loc[df['week_day'] == 5, 'profile_id'] = df['sp_hour_sa'] + df.loc[df['week_day'] == 6, 'profile_id'] = df['sp_hour_su'] + + df['profile_id'] = df['profile_id'].astype(int) + + # Selecting flat profile for 0 and nan's + df.loc[df['profile_id'] == 0, 'profile_id'] = 1 + df.loc[df['profile_id'] == np.nan, 'profile_id'] = 1 + + # Calculating speed by tstep + speed_column_name = 'v_{0}'.format(tstep) + df[speed_column_name] = self.calculate_hourly_speed(df.loc[:, ['hour', 'speed_mean', 'profile_id']]) + + factor_column_name = 'f_{0}'.format(tstep) + + df.loc[:, factor_column_name] = self.calculate_temporal_factor( + df.loc[:, ['month', 'week_day', 'hour', 'aadt_m_mn', 'aadt_week', 'aadt_h_mn', 'aadt_h_wd', + 'aadt_h_sat', 'aadt_h_sun']]) + + # Deleting time variables + + del df['month'], df['week_day'], df['hour'], df['profile_id'], df['speed_mean'] + del df['sp_wd'], df['sp_we'], df['index'] + del df['sp_hour_mo'], df['sp_hour_tu'], df['sp_hour_we'], df['sp_hour_th'], df['sp_hour_fr'] + del df['sp_hour_sa'], df['sp_hour_su'] + del df['aadt_m_mn'], df['aadt_h_mn'], df['aadt_h_wd'], df['aadt_h_sat'], df['aadt_h_sun'], df['aadt_week'] + del df['start_date'] + + self.logger.write_time_log('TrafficSector', 'calculate_time_dependent_values', + timeit.default_timer() - spent_time) + + return df + + def expand_road_links(self, timestep_type, timestep_num, timestep_freq): + spent_time = timeit.default_timer() + + # Expands each road link by any vehicle type that the selected road link has. + df_list = [] + road_link_aux = self.road_links.copy() + + del road_link_aux['geometry'] + for zone, compo_df in road_link_aux.groupby('fleet_comp'): + fleet = self.find_fleet(zone) + df_aux = pd.merge(compo_df, fleet, how='left', on='fleet_comp') + df_list.append(df_aux) + + df = pd.concat(df_list, ignore_index=True) + libc.malloc_trim(0) + + del df['fleet_comp'] + + df = self.update_fleet_value(df) + df = self.calculate_time_dependent_values(df, timestep_type, timestep_num, timestep_freq) + + self.logger.write_time_log('TrafficSector', 'expand_road_links', timeit.default_timer() - spent_time) + + return df + + def find_fleet(self, zone): + spent_time = timeit.default_timer() + + try: + fleet = self.fleet_compo[['Code', 'Class', zone]] + except KeyError as e: + raise KeyError(e.message + ' of the fleet_compo file') + fleet.columns = ['Fleet_Code', 'Fleet_Class', 'Fleet_value'] + + fleet = fleet[fleet['Fleet_value'] > 0] + + fleet['fleet_comp'] = zone + + self.logger.write_time_log('TrafficSector', 'find_fleet', timeit.default_timer() - spent_time) + + return fleet + + def calculate_hot(self): + spent_time = timeit.default_timer() + + expanded_aux = self.expanded.copy().reset_index() + + for pollutant in self.source_pollutants: + if pollutant != 'nh3': + + ef_code_slope_road, ef_code_slope, ef_code_road, ef_code = self.read_ef('hot', pollutant) + df_code_slope_road = expanded_aux.merge( + ef_code_slope_road, left_on=['Fleet_Code', 'road_grad', 'Road_type'], + right_on=['Code', 'Road.Slope', 'Mode'], how='inner') + df_code_slope = expanded_aux.merge(ef_code_slope, left_on=['Fleet_Code', 'road_grad'], + right_on=['Code', 'Road.Slope'], how='inner') + df_code_road = expanded_aux.merge(ef_code_road, left_on=['Fleet_Code', 'Road_type'], + right_on=['Code', 'Mode'], how='inner') + df_code = expanded_aux.merge(ef_code, left_on=['Fleet_Code'], right_on=['Code'], how='inner') + + del ef_code_slope_road, ef_code_slope, ef_code_road, ef_code + + expanded_aux = pd.concat([df_code_slope_road, df_code_slope, df_code_road, df_code]) + + expanded_aux.drop(columns=['Code', 'Road.Slope', 'Mode'], inplace=True) + else: + ef_code_road = self.read_ef('hot', pollutant) + expanded_aux = expanded_aux.merge(ef_code_road, left_on=['Fleet_Code', 'Road_type'], + right_on=['Code', 'Mode'], how='inner') + + del expanded_aux['Code'], expanded_aux['Mode'] + + # Warnings and Errors + original_ef_profile = self.expanded['Fleet_Code'].unique() + calculated_ef_profiles = expanded_aux['Fleet_Code'].unique() + resta_1 = [item for item in original_ef_profile if item not in calculated_ef_profiles] # Warining + resta_2 = [item for item in calculated_ef_profiles if item not in original_ef_profile] # Error + + if len(resta_1) > 0: + self.logger.write_log('WARNING! Exists some fleet codes that not appear on the EF file: {0}'.format( + resta_1)) + warnings.warn('Exists some fleet codes that not appear on the EF file: {0}'.format(resta_1), Warning) + if len(resta_2) > 0: + raise ImportError('Exists some fleet codes duplicateds on the EF file: {0}'.format(resta_2)) + + m_corr = self.read_mcorr_file(pollutant) + if m_corr is not None: + expanded_aux = expanded_aux.merge(m_corr, left_on='Fleet_Code', right_on='Code', how='left') + del expanded_aux['Code'] + + for tstep in xrange(self.timestep_num): + ef_name = 'ef_{0}_{1}'.format(pollutant, tstep) + p_column = '{0}_{1}'.format(pollutant, tstep) + if pollutant != 'nh3': + expanded_aux['v_aux'] = expanded_aux['v_{0}'.format(tstep)] + expanded_aux.loc[expanded_aux['v_aux'] < expanded_aux['Min.Speed'], 'v_aux'] = expanded_aux.loc[ + expanded_aux['v_aux'] < expanded_aux['Min.Speed'], 'Min.Speed'] + expanded_aux.loc[expanded_aux['v_aux'] > expanded_aux['Max.Speed'], 'v_aux'] = expanded_aux.loc[ + expanded_aux['v_aux'] > expanded_aux['Max.Speed'], 'Max.Speed'] + + # EF + expanded_aux.loc[:, ef_name] = \ + ((expanded_aux.Alpha * expanded_aux.v_aux**2 + expanded_aux.Beta * expanded_aux.v_aux + + expanded_aux.Gamma + (expanded_aux.Delta / expanded_aux.v_aux)) / + (expanded_aux.Epsilon * expanded_aux.v_aux**2 + expanded_aux.Zita * expanded_aux.v_aux + + expanded_aux.Hta)) * (1 - expanded_aux.RF) * \ + (expanded_aux.PF * expanded_aux['T'] / expanded_aux.Q) + else: + expanded_aux.loc[:, ef_name] = \ + ((expanded_aux['a'] * expanded_aux['Cmileage'] + expanded_aux['b']) * + (expanded_aux['EFbase'] * expanded_aux['TF'])) / 1000 + + # Mcorr + if m_corr is not None: + expanded_aux.loc[expanded_aux['v_aux'] <= 19., 'Mcorr'] = \ + expanded_aux.A_urban * expanded_aux['M'] + expanded_aux.B_urban + expanded_aux.loc[expanded_aux['v_aux'] >= 63., 'Mcorr'] = \ + expanded_aux.A_road * expanded_aux['M'] + expanded_aux.B_road + expanded_aux.loc[(expanded_aux['v_aux'] > 19.) & (expanded_aux['v_aux'] < 63.), 'Mcorr'] = \ + (expanded_aux.A_urban * expanded_aux['M'] + expanded_aux.B_urban) + \ + ((expanded_aux.v_aux - 19) * + ((expanded_aux.A_road * expanded_aux['M'] + expanded_aux.B_road) - + (expanded_aux.A_urban * expanded_aux['M'] + expanded_aux.B_urban))) / 44. + expanded_aux.loc[expanded_aux['Mcorr'].isnull(), 'Mcorr'] = 1 + else: + expanded_aux.loc[:, 'Mcorr'] = 1 + + # Full formula + expanded_aux.loc[:, p_column] = \ + expanded_aux['Fleet_value'] * expanded_aux[ef_name] * expanded_aux['Mcorr'] * \ + expanded_aux['f_{0}'.format(tstep)] + del expanded_aux[ef_name], expanded_aux['Mcorr'] + + if pollutant != 'nh3': + del expanded_aux['v_aux'] + del expanded_aux['Min.Speed'], expanded_aux['Max.Speed'], expanded_aux['Alpha'], expanded_aux['Beta'] + del expanded_aux['Gamma'], expanded_aux['Delta'], expanded_aux['Epsilon'], expanded_aux['Zita'] + del expanded_aux['Hta'], expanded_aux['RF'], expanded_aux['Q'], expanded_aux['PF'], expanded_aux['T'] + else: + del expanded_aux['a'], expanded_aux['Cmileage'], expanded_aux['b'], expanded_aux['EFbase'] + del expanded_aux['TF'] + + if m_corr is not None: + del expanded_aux['A_urban'], expanded_aux['B_urban'], expanded_aux['A_road'], expanded_aux['B_road'] + del expanded_aux['M'] + + del expanded_aux['road_grad'] + + for tstep in xrange(self.timestep_num): + del expanded_aux['f_{0}'.format(tstep)] + + self.logger.write_time_log('TrafficSector', 'calculate_hot', timeit.default_timer() - spent_time) + return expanded_aux + + def calculate_cold(self, hot_expanded): + spent_time = timeit.default_timer() + + cold_links = self.road_links.copy() + + del cold_links['aadt'], cold_links['PcHeavy'], cold_links['PcMoto'], cold_links['PcMoped'], cold_links['sp_wd'] + del cold_links['sp_we'], cold_links['sp_hour_su'], cold_links['sp_hour_mo'], cold_links['sp_hour_tu'] + del cold_links['sp_hour_we'], cold_links['sp_hour_th'], cold_links['sp_hour_fr'], cold_links['sp_hour_sa'] + del cold_links['Road_type'], cold_links['aadt_m_mn'], cold_links['aadt_h_mn'], cold_links['aadt_h_wd'] + del cold_links['aadt_h_sat'], cold_links['aadt_h_sun'], cold_links['aadt_week'], cold_links['fleet_comp'] + del cold_links['road_grad'], cold_links['PcLight'], cold_links['start_date'] + libc.malloc_trim(0) + + cold_links.loc[:, 'centroid'] = cold_links['geometry'].centroid + link_lons = cold_links['geometry'].centroid.x + link_lats = cold_links['geometry'].centroid.y + + temperature = IoNetcdf(self.comm).get_hourly_data_from_netcdf( + link_lons.min(), link_lons.max(), link_lats.min(), link_lats.max(), self.temp_common_path, 'tas', + self.date_array) + temperature.rename(columns={x: 't_{0}'.format(x) for x in xrange(len(self.date_array))}, inplace=True) + # From Kelvin to Celsius degrees + temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] = \ + temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] - 273.15 + + unary_union = temperature.unary_union + cold_links['REC'] = cold_links.apply(self.nearest, geom_union=unary_union, df1=cold_links, df2=temperature, + geom1_col='centroid', src_column='REC', axis=1) + del cold_links['geometry'], cold_links['centroid'], temperature['geometry'] + libc.malloc_trim(0) + + cold_links = cold_links.merge(temperature, left_on='REC', right_on='REC', how='left') + del cold_links['REC'] + libc.malloc_trim(0) + + c_expanded = hot_expanded.merge(cold_links, left_on='Link_ID', right_on='Link_ID', how='left') + + df_list = [] + for pollutant in self.source_pollutants: + + ef_cold = self.read_ef('cold', pollutant) + + if pollutant != 'nh3': + ef_cold.loc[ef_cold['Tmin'].isnull(), 'Tmin'] = -999 + ef_cold.loc[ef_cold['Tmax'].isnull(), 'Tmax'] = 999 + ef_cold.loc[ef_cold['Min.Speed'].isnull(), 'Min.Speed'] = -999 + ef_cold.loc[ef_cold['Max.Speed'].isnull(), 'Max.Speed'] = 999 + + c_expanded_p = c_expanded.merge(ef_cold, left_on=['Fleet_Code', 'Road_type'], + right_on=['Code', 'Mode'], how='inner') + cold_exp_p_aux = c_expanded_p.copy() + + del cold_exp_p_aux['index_right_x'], cold_exp_p_aux['Road_type'], cold_exp_p_aux['Fleet_value'] + del cold_exp_p_aux['Code'] + libc.malloc_trim(0) + + for tstep in xrange(self.timestep_num): + v_column = 'v_{0}'.format(tstep) + p_column = '{0}_{1}'.format(pollutant, tstep) + t_column = 't_{0}'.format(tstep) + if pollutant != 'nh3': + cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[t_column] >= cold_exp_p_aux['Tmin'], :] + cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[t_column] < cold_exp_p_aux['Tmax'], :] + cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[v_column] >= cold_exp_p_aux['Min.Speed'], :] + cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[v_column] < cold_exp_p_aux['Max.Speed'], :] + + # Beta + cold_exp_p_aux.loc[:, 'Beta'] = \ + (0.6474 - (0.02545 * cold_exp_p_aux['ltrip']) - (0.00974 - (0.000385 * cold_exp_p_aux['ltrip'])) * + cold_exp_p_aux[t_column]) * cold_exp_p_aux['bc'] + if pollutant != 'nh3': + cold_exp_p_aux.loc[:, 'cold_hot'] = \ + cold_exp_p_aux['A'] * cold_exp_p_aux[v_column] + cold_exp_p_aux['B'] * \ + cold_exp_p_aux[t_column] + cold_exp_p_aux['C'] + + else: + cold_exp_p_aux.loc[:, 'cold_hot'] = \ + ((cold_exp_p_aux['a'] * cold_exp_p_aux['Cmileage'] + cold_exp_p_aux['b']) * + cold_exp_p_aux['EFbase'] * cold_exp_p_aux['TF']) / \ + ((cold_exp_p_aux['a_hot'] * cold_exp_p_aux['Cmileage'] + cold_exp_p_aux['b_hot']) * + cold_exp_p_aux['EFbase_hot'] * cold_exp_p_aux['TF_hot']) + cold_exp_p_aux.loc[cold_exp_p_aux['cold_hot'] < 1, 'cold_hot'] = 1 + + # Formula Cold emissions + cold_exp_p_aux.loc[:, p_column] = \ + cold_exp_p_aux[p_column] * cold_exp_p_aux['Beta'] * (cold_exp_p_aux['cold_hot'] - 1) + df_list.append((cold_exp_p_aux.loc[:, ['Link_ID', 'Fleet_Code', p_column]]).set_index( + ['Link_ID', 'Fleet_Code'])) + + try: + cold_df = pd.concat(df_list, axis=1, ).reset_index() + except Exception: + error_fleet_code = [] + for df in df_list: + orig = list(df.index.values) + uni = list(np.unique(df.index.values)) + + for o in orig: + try: + uni.remove(o) + except Exception: + error_fleet_code.append(o) + raise IndexError('There are duplicated values for {0} codes in the cold EF files.'.format(error_fleet_code)) + + for tstep in xrange(self.timestep_num): + if 'pm' in self.source_pollutants: + cold_df.loc[:, 'pm10_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] + cold_df.loc[:, 'pm25_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] + del cold_df['pm_{0}'.format(tstep)] + libc.malloc_trim(0) + if 'voc' in self.source_pollutants and 'ch4' in self.source_pollutants: + cold_df.loc[:, 'nmvoc_{0}'.format(tstep)] = \ + cold_df['voc_{0}'.format(tstep)] - cold_df['ch4_{0}'.format(tstep)] + del cold_df['voc_{0}'.format(tstep)] + libc.malloc_trim(0) + else: + self.logger.write_log("WARNING! nmvoc emissions cannot be estimated because voc or ch4 are not " + + "selected in the pollutant list.") + warnings.warn("nmvoc emissions cannot be estimated because voc or ch4 are not selected in the " + + "pollutant list.") + + cold_df = self.speciate_traffic(cold_df, self.hot_cold_speciation) + libc.malloc_trim(0) + self.logger.write_time_log('TrafficSector', 'calculate_cold', timeit.default_timer() - spent_time) + return cold_df + + def compact_hot_expanded(self, expanded): + spent_time = timeit.default_timer() + + columns_to_delete = ['Road_type', 'Fleet_value'] + ['v_{0}'.format(x) for x in xrange(self.timestep_num)] + for column_name in columns_to_delete: + del expanded[column_name] + + for tstep in xrange(self.timestep_num): + if 'pm' in self.source_pollutants: + expanded.loc[:, 'pm10_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] + expanded.loc[:, 'pm25_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] + del expanded['pm_{0}'.format(tstep)] + + if 'voc' in self.source_pollutants and 'ch4' in self.source_pollutants: + expanded.loc[:, 'nmvoc_{0}'.format(tstep)] = expanded['voc_{0}'.format(tstep)] - \ + expanded['ch4_{0}'.format(tstep)] + del expanded['voc_{0}'.format(tstep)] + else: + self.logger.write_log("nmvoc emissions cannot be estimated because voc or ch4 are not selected in " + + "the pollutant list.") + warnings.warn( + "nmvoc emissions cannot be estimated because voc or ch4 are not selected in the pollutant list.") + + compacted = self.speciate_traffic(expanded, self.hot_cold_speciation) + + self.logger.write_time_log('TrafficSector', 'compact_hot_expanded', timeit.default_timer() - spent_time) + return compacted + + def calculate_tyre_wear(self): + spent_time = timeit.default_timer() + + pollutants = ['pm'] + for pollutant in pollutants: + ef_tyre = self.read_ef('tyre', pollutant) + df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + del df['road_grad'], df['Road_type'], df['Code'] + for tstep in xrange(self.timestep_num): + p_column = '{0}_{1}'.format(pollutant, tstep) + f_column = 'f_{0}'.format(tstep) + v_column = 'v_{0}'.format(tstep) + df.loc[df[v_column] < 40, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * 1.39 + df.loc[(df[v_column] >= 40) & (df[v_column] <= 90), p_column] = \ + df['Fleet_value'] * df['EFbase'] * df[f_column] * (-0.00974 * df[v_column] + 1.78) + df.loc[df[v_column] > 90, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * 0.902 + + # from PM to PM10 & PM2.5 + if pollutant == 'pm': + df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.6 + df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.42 + del df[p_column] + + # Cleaning df + columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange( + self.timestep_num)] + columns_to_delete += ['Fleet_value', 'EFbase'] + for column in columns_to_delete: + del df[column] + df = self.speciate_traffic(df, self.tyre_speciation) + + self.logger.write_time_log('TrafficSector', 'calculate_tyre_wear', timeit.default_timer() - spent_time) + return df + + def calculate_brake_wear(self): + spent_time = timeit.default_timer() + + pollutants = ['pm'] + for pollutant in pollutants: + ef_tyre = self.read_ef('brake', pollutant) + df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + del df['road_grad'], df['Road_type'], df['Code'] + for tstep in xrange(self.timestep_num): + p_column = '{0}_{1}'.format(pollutant, tstep) + f_column = 'f_{0}'.format(tstep) + v_column = 'v_{0}'.format(tstep) + df.loc[df[v_column] < 40, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * 1.67 + df.loc[(df[v_column] >= 40) & (df[v_column] <= 95), p_column] = \ + df['Fleet_value'] * df['EFbase'] * df[f_column] * (-0.027 * df[v_column] + 2.75) + df.loc[df[v_column] > 95, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] * 0.185 + + # from PM to PM10 & PM2.5 + if pollutant == 'pm': + df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.98 + df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.39 + del df[p_column] + + # Cleaning df + columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange( + self.timestep_num)] + columns_to_delete += ['Fleet_value', 'EFbase'] + for column in columns_to_delete: + del df[column] + + df = self.speciate_traffic(df, self.brake_speciation) + + self.logger.write_time_log('TrafficSector', 'calculate_brake_wear', timeit.default_timer() - spent_time) + return df + + def calculate_road_wear(self): + spent_time = timeit.default_timer() + + pollutants = ['pm'] + for pollutant in pollutants: + ef_tyre = self.read_ef('road', pollutant) + df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + del df['road_grad'], df['Road_type'], df['Code'] + for tstep in xrange(self.timestep_num): + p_column = '{0}_{1}'.format(pollutant, tstep) + f_column = 'f_{0}'.format(tstep) + df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] + + # from PM to PM10 & PM2.5 + if pollutant == 'pm': + df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.5 + df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.27 + del df[p_column] + + # Cleaning df + columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange( + self.timestep_num)] + columns_to_delete += ['Fleet_value', 'EFbase'] + for column in columns_to_delete: + del df[column] + + df = self.speciate_traffic(df, self.road_speciation) + + self.logger.write_time_log('TrafficSector', 'calculate_road_wear', timeit.default_timer() - spent_time) + return df + + def calculate_resuspension(self): + spent_time = timeit.default_timer() + + if self.resuspension_correction: + road_link_aux = self.road_links.loc[:, ['Link_ID', 'geometry']].copy() + + road_link_aux.loc[:, 'centroid'] = road_link_aux['geometry'].centroid + link_lons = road_link_aux['geometry'].centroid.x + link_lats = road_link_aux['geometry'].centroid.y + + p_factor = self.calculate_precipitation_factor(link_lons.min(), link_lons.max(), link_lats.min(), + link_lats.max(), self.precipitation_path) + + unary_union = p_factor.unary_union + road_link_aux['REC'] = road_link_aux.apply(self.nearest, geom_union=unary_union, df1=road_link_aux, + df2=p_factor, geom1_col='centroid', src_column='REC', axis=1) + del road_link_aux['centroid'], p_factor['geometry'] + + road_link_aux = road_link_aux.merge(p_factor, left_on='REC', right_on='REC', how='left') + + del road_link_aux['REC'] + + pollutants = ['pm'] + for pollutant in pollutants: + ef_tyre = self.read_ef('resuspension', pollutant) + df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + if self.resuspension_correction: + df = df.merge(road_link_aux, left_on='Link_ID', right_on='Link_ID', how='left') + + del df['road_grad'], df['Road_type'], df['Code'] + for tstep in xrange(self.timestep_num): + p_column = '{0}_{1}'.format(pollutant, tstep) + f_column = 'f_{0}'.format(tstep) + if self.resuspension_correction: + pr_column = 'PR_{0}'.format(tstep) + df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[pr_column] * df[f_column] + else: + df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] + + # from PM to PM10 & PM2.5 + if pollutant == 'pm': + df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] + # TODO Check fraction of pm2.5 + df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.5 + del df[p_column] + + # Cleaning df + columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in + xrange(self.timestep_num)] + columns_to_delete += ['Fleet_value', 'EFbase'] + for column in columns_to_delete: + del df[column] + + df = self.speciate_traffic(df, self.resuspension_speciation) + + self.logger.write_time_log('TrafficSector', 'calculate_resuspension', timeit.default_timer() - spent_time) + return df + + def transform_df(self, df): + spent_time = timeit.default_timer() + + df_list = [] + for tstep in xrange(self.timestep_num): + pollutants_to_rename = [p for p in list(df.columns.values) if p.endswith('_{0}'.format(tstep))] + pollutants_renamed = [] + for p_name in pollutants_to_rename: + p_name_new = p_name.replace('_{0}'.format(tstep), '') + df.rename(columns={p_name: p_name_new}, inplace=True) + pollutants_renamed.append(p_name_new) + + df_aux = pd.DataFrame(df.loc[:, ['Link_ID', 'Fleet_Code'] + pollutants_renamed]) + df_aux['tstep'] = tstep + + df_list.append(df_aux) + df.drop(columns=pollutants_renamed, inplace=True) + + df = pd.concat(df_list, ignore_index=True) + self.logger.write_time_log('TrafficSector', 'transform_df', timeit.default_timer() - spent_time) + return df + + def speciate_traffic(self, df, speciation): + spent_time = timeit.default_timer() + + # Reads speciation profile + speciation = self.read_profiles(speciation) + + del speciation['Copert_V_name'] + + # Transform dataset into timestep rows instead of timestep columns + df = self.transform_df(df) + + in_list = list(df.columns.values) + + in_columns = ['Link_ID', 'Fleet_Code', 'tstep'] + for in_col in in_columns: + in_list.remove(in_col) + + df_out_list = [] + + # PMC + if not set(speciation.columns.values).isdisjoint(pmc_list): + out_p = set(speciation.columns.values).intersection(pmc_list).pop() + speciation_by_in_p = speciation.loc[:, [out_p] + ['Code']] + + speciation_by_in_p.rename(columns={out_p: 'f_{0}'.format(out_p)}, inplace=True) + df_aux = df.loc[:, ['pm10', 'pm25', 'Fleet_Code', 'tstep', 'Link_ID']] + df_aux = df_aux.merge(speciation_by_in_p, left_on='Fleet_Code', right_on='Code', how='left') + df_aux.drop(columns=['Code'], inplace=True) + + df_aux.loc[:, out_p] = df_aux['pm10'] - df_aux['pm25'] + + df_out_list.append(df_aux.loc[:, [out_p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) + del df_aux[out_p] + for in_p in in_list: + involved_out_pollutants = [key for key, value in self.speciation_map.iteritems() if value == in_p] + + # Selecting only necessary speciation profiles + speciation_by_in_p = speciation.loc[:, involved_out_pollutants + ['Code']] + + # Adding "f_" in the formula column names + for p in involved_out_pollutants: + speciation_by_in_p.rename(columns={p: 'f_{0}'.format(p)}, inplace=True) + # Getting a slice of the full dataset to be merged + df_aux = df.loc[:, [in_p] + ['Fleet_Code', 'tstep', 'Link_ID']] + df_aux = df_aux.merge(speciation_by_in_p, left_on='Fleet_Code', right_on='Code', how='left') + df_aux.drop(columns=['Code'], inplace=True) + + # Renaming pollutant columns by adding "old_" to the beginning. + df_aux.rename(columns={in_p: 'old_{0}'.format(in_p)}, inplace=True) + for p in involved_out_pollutants: + if in_p is not np.nan: + if in_p != 0: + df_aux.loc[:, p] = df_aux['old_{0}'.format(in_p)].multiply(df_aux['f_{0}'.format(p)]) + try: + if in_p == 'nmvoc': + mol_w = 1.0 + else: + mol_w = self.molecular_weights[in_p] + except KeyError: + raise AttributeError('{0} not found in the molecular weights file.'.format(in_p)) + # from g/km.h to mol/km.h or g/km.h (aerosols) + df_aux.loc[:, p] = df_aux.loc[:, p] / mol_w + + else: + df_aux.loc[:, p] = 0 + + df_out_list.append(df_aux.loc[:, [p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) + del df_aux[p] + del df_aux + del df[in_p] + + df_out = pd.concat(df_out_list, axis=1) + + self.logger.write_time_log('TrafficSector', 'speciate_traffic', timeit.default_timer() - spent_time) + return df_out + + def calculate_emissions(self): + spent_time = timeit.default_timer() + version = 1 + self.logger.write_log('\tCalculating Road traffic emissions', message_level=1) + df_accum = pd.DataFrame() + + if version == 2: + if self.do_hot: + self.logger.write_log('\t\tCalculating Hot emissions.', message_level=2) + df_accum = pd.concat([df_accum, self.compact_hot_expanded(self.calculate_hot())]).groupby( + ['tstep', 'Link_ID']).sum() + if self.do_cold: + self.logger.write_log('\t\tCalculating Cold emissions.', message_level=2) + df_accum = pd.concat([df_accum, self.calculate_cold(self.calculate_hot())]).groupby( + ['tstep', 'Link_ID']).sum() + else: + if self.do_hot or self.do_cold: + self.logger.write_log('\t\tCalculating Hot emissions.', message_level=2) + hot_emis = self.calculate_hot() + + if self.do_hot: + self.logger.write_log('\t\tCompacting Hot emissions.', message_level=2) + df_accum = pd.concat([df_accum, self.compact_hot_expanded(hot_emis.copy())]).groupby( + ['tstep', 'Link_ID']).sum() + libc.malloc_trim(0) + if self.do_cold: + self.logger.write_log('\t\tCalculating Cold emissions.', message_level=2) + df_accum = pd.concat([df_accum, self.calculate_cold(hot_emis)]).groupby( + ['tstep', 'Link_ID']).sum() + libc.malloc_trim(0) + if self.do_hot or self.do_cold: + del hot_emis + libc.malloc_trim(0) + + if self.do_tyre_wear: + self.logger.write_log('\t\tCalculating Tyre wear emissions.', message_level=2) + df_accum = pd.concat([df_accum, self.calculate_tyre_wear()]).groupby(['tstep', 'Link_ID']).sum() + libc.malloc_trim(0) + if self.do_brake_wear: + self.logger.write_log('\t\tCalculating Brake wear emissions.', message_level=2) + df_accum = pd.concat([df_accum, self.calculate_brake_wear()]).groupby(['tstep', 'Link_ID']).sum() + libc.malloc_trim(0) + if self.do_road_wear: + self.logger.write_log('\t\tCalculating Road wear emissions.', message_level=2) + df_accum = pd.concat([df_accum, self.calculate_road_wear()]).groupby(['tstep', 'Link_ID']).sum() + libc.malloc_trim(0) + if self.do_resuspension: + self.logger.write_log('\t\tCalculating Resuspension emissions.', message_level=2) + df_accum = pd.concat([df_accum, self.calculate_resuspension()]).groupby(['tstep', 'Link_ID']).sum() + libc.malloc_trim(0) + df_accum = df_accum.reset_index().merge(self.road_links.loc[:, ['Link_ID', 'geometry']], left_on='Link_ID', + right_on='Link_ID', how='left') + df_accum = gpd.GeoDataFrame(df_accum, crs=self.crs) + libc.malloc_trim(0) + df_accum.set_index(['Link_ID', 'tstep'], inplace=True) + + if self.write_rline: + self.write_rline_output(df_accum.copy()) + self.logger.write_log('\t\tRoad link emissions to grid.', message_level=2) + df_accum = self.links_to_grid(df_accum) + libc.malloc_trim(0) + + self.logger.write_log('\tRoad traffic emissions calculated', message_level=2) + self.logger.write_time_log('TrafficSector', 'calculate_emissions', timeit.default_timer() - spent_time) + return df_accum + + def links_to_grid(self, link_emissions): + spent_time = timeit.default_timer() + + link_emissions.reset_index(inplace=True) + if not os.path.exists(self.link_to_grid_csv): + link_emissions_aux = link_emissions.loc[link_emissions['tstep'] == 0, :] + + link_emissions_aux = link_emissions_aux.to_crs(self.grid_shp.crs) + + link_emissions_aux = gpd.sjoin(link_emissions_aux, self.grid_shp.reset_index(), + how="inner", op='intersects') + + link_emissions_aux = link_emissions_aux.loc[:, ['Link_ID', 'geometry', 'FID']] + + link_emissions_aux = link_emissions_aux.merge(self.grid_shp.reset_index().loc[:, ['FID', 'geometry']], + on='FID', how='left') + + length_list = [] + link_id_list = [] + fid_list = [] + count = 1 + for i, line in link_emissions_aux.iterrows(): + count += 1 + aux = line.get('geometry_x').intersection(line.get('geometry_y')) + if not aux.is_empty: + link_id_list.append(line.get('Link_ID')) + fid_list.append(line.get('FID')) + # Length of road links from m to km + length_list.append(aux.length / 1000) + + link_grid = pd.DataFrame({'Link_ID': link_id_list, 'FID': fid_list, 'length': length_list}) + + # Writing link to grid file + data = self.comm.gather(link_grid, root=0) + if self.comm.Get_rank() == 0: + if not os.path.exists(os.path.dirname(self.link_to_grid_csv)): + os.makedirs(os.path.dirname(self.link_to_grid_csv)) + data = pd.concat(data) + data.to_csv(self.link_to_grid_csv) + + self.comm.Barrier() + + else: + link_grid = pd.read_csv(self.link_to_grid_csv) + link_grid = link_grid[link_grid['Link_ID'].isin(link_emissions['Link_ID'].values)] + + del link_emissions['geometry'] + link_grid = link_grid.merge(link_emissions, left_on='Link_ID', right_on='Link_ID') + if 'Unnamed: 0' in link_grid.columns.values: + link_grid.drop(columns=['Unnamed: 0'], inplace=True) + + cols_to_update = list(link_grid.columns.values) + cols_to_update.remove('length') + cols_to_update.remove('tstep') + cols_to_update.remove('FID') + for col in cols_to_update: + link_grid.loc[:, col] = link_grid[col] * link_grid['length'] + del link_grid['length'] + link_grid.drop(columns=['Link_ID'], inplace=True) + link_grid['layer'] = 0 + link_grid = link_grid.groupby(['FID', 'layer', 'tstep']).sum() + + self.logger.write_time_log('TrafficSector', 'links_to_grid', timeit.default_timer() - spent_time) + + return link_grid + + def write_rline_output(self, emissions): + from datetime import timedelta + spent_time = timeit.default_timer() + + emissions.drop(columns=['geometry'], inplace=True) + for poll in emissions.columns.values: + mol_w = self.molecular_weights[self.speciation_map[poll]] + # From g/km.h to g/m.s + emissions.loc[:, poll] = emissions.loc[:, poll] * mol_w / (1000 * 3600) + + emissions.reset_index(inplace=True) + + emissions_list = self.comm.gather(emissions, root=0) + if self.comm.Get_rank() == 0: + emissions = pd.concat(emissions_list) + p_list = list(emissions.columns.values) + p_list.remove('tstep') + p_list.remove('Link_ID') + for p in p_list: + link_list = ['L_{0}'.format(x) for x in list(pd.unique(emissions['Link_ID']))] + out_df = pd.DataFrame(columns=["Year", "Mon", "Day", "JDay", "Hr"] + link_list) + for tstep, aux in emissions.loc[:, ['tstep', 'Link_ID', p]].groupby('tstep'): + aux_date = self.date_array[0] + timedelta(hours=tstep) + out_df.loc[tstep, 'Year'] = aux_date.strftime('%y') + out_df.loc[tstep, 'Mon'] = aux_date.month + out_df.loc[tstep, 'Day'] = aux_date.day + out_df.loc[tstep, 'JDay'] = aux_date.strftime('%j') + out_df.loc[tstep, 'Hr'] = aux_date.hour + out_df.loc[tstep, link_list] = aux.loc[:, [p]].transpose().values + + out_df.to_csv(os.path.join(self.output_dir, 'rline_{1}_{0}.csv'.format( + p, self.date_array[0].strftime('%Y%m%d'))), index=False) + + self.comm.Barrier() + + self.logger.write_time_log('TrafficSector', 'write_rline_output', timeit.default_timer() - spent_time) + return True + + def write_rline_roadlinks(self, df_in): + spent_time = timeit.default_timer() + + df_in_list = self.comm.gather(df_in, root=0) + if self.comm.Get_rank() == 0: + df_in = pd.concat(df_in_list) + + df_out = pd.DataFrame( + columns=['Group', 'X_b', 'Y_b', 'Z_b', 'X_e', 'Y_e', 'Z_e', 'dCL', 'sigmaz0', '#lanes', + 'lanewidth', 'Emis', 'Hw1', 'dw1', 'Hw2', 'dw2', 'Depth', 'Wtop', 'Wbottom', + 'l_bh2sw', 'l_avgbh', 'l_avgbdensity', 'l_bhdev', 'X0_af', 'X45_af', + 'X90_af', 'X135_af', 'X180_af', 'X225_af', 'X270_af', 'X315_af', 'l_maxbh', 'Link_ID']) + df_err_list = [] + + df_in = df_in.to_crs({u'units': u'm', u'no_defs': True, u'ellps': u'intl', u'proj': u'utm', u'zone': 31}) + if rline_shp: + gpd.GeoDataFrame().to_file + df_in.to_file(os.path.join(self.output_dir, 'roads.shp')) + + count = 0 + for i, line in df_in.iterrows(): + try: + df_out.loc[count] = pd.Series({ + 'Group': 'G1', + 'X_b': round(line.get('geometry').coords[0][0], 3), + 'Y_b': round(line.get('geometry').coords[0][1], 3), + 'Z_b': 1, + 'X_e': round(line.get('geometry').coords[-1][0], 3), + 'Y_e': round(line.get('geometry').coords[-1][1], 3), + 'Z_e': 1, + 'dCL': 0, + 'sigmaz0': 2, + '#lanes': 3, + 'lanewidth': 2.5, + 'Emis': 1, + 'Hw1': 0, + 'dw1': 0, + 'Hw2': 0, + 'dw2': 0, + 'Depth': 0, + 'Wtop': 0, + 'Wbottom': 0, + 'l_bh2sw': round(line.get('bh_2_sw'), 3), + 'l_avgbh': round(line.get('mean_heigh'), 3), + 'l_avgbdensity': round(line.get('area_densi'), 3), + 'l_bhdev': round(line.get('sd_height'), 3), + 'X0_af': round(line.get('af_0'), 3), + 'X45_af': round(line.get('af_45'), 3), + 'X90_af': round(line.get('af_90'), 3), + 'X135_af': round(line.get('af_135'), 3), + 'X180_af': round(line.get('af_180'), 3), + 'X225_af': round(line.get('af_225'), 3), + 'X270_af': round(line.get('af_270'), 3), + 'X315_af': round(line.get('af_315'), 3), + 'l_maxbh': round(line.get('max_height'), 3), + 'Link_ID': line.get('Link_ID'), + }) + count += 1 + except Exception: + # df_err_list.append(line) + pass + + df_out.set_index('Link_ID', inplace=True) + df_out.sort_index(inplace=True) + df_out.to_csv(os.path.join(self.output_dir, 'roads.txt'), index=False, sep=' ') + self.comm.Barrier() + self.logger.write_log('\t\tTraffic emissions calculated', message_level=2) + self.logger.write_time_log('TrafficSector', 'write_rline_roadlinks', timeit.default_timer() - spent_time) + return True diff --git a/hermesv3_bu/modules/writing/__init__.py b/hermesv3_bu/writer/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from hermesv3_bu/modules/writing/__init__.py rename to hermesv3_bu/writer/__init__.py diff --git a/hermesv3_bu/writer/cmaq_writer.py b/hermesv3_bu/writer/cmaq_writer.py new file mode 100755 index 0000000000000000000000000000000000000000..b74c9d25bb80351a1718a9d8562defc0ac2166c2 --- /dev/null +++ b/hermesv3_bu/writer/cmaq_writer.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python + +import numpy as np +import pandas as pd +from warnings import warn +import sys +from netCDF4 import Dataset, date2num +from hermesv3_bu.writer.writer import Writer +from mpi4py import MPI +import timeit +from hermesv3_bu.logger.log import Log + + +class CmaqWriter(Writer): + def __init__(self, comm_world, comm_write, logger, netcdf_path, grid, date_array, pollutant_info, + rank_distribution, global_attributes_path, emission_summary=False): + """ + Initialise the CMAQ writer that will write a NetCDF in the CMAQ input format (IOAPIv3.2). + + :param comm_world: Global communicator for all the calculation process + :type comm_world: MPI.COMM + + :param comm_write: Sector communicator. + :type comm_write: MPI.Intracomm + + :param logger: Logger + :type logger: Log + + :param netcdf_path: Path to the output NetCDF file- + :type netcdf_path: str + + :param grid: Output grid definition. + :type grid: hermesv3_bu.grids.grid.Grid + + :param date_array: Array with each time step to be calculated. + :type date_array: list of datetime.datetime + + :param pollutant_info: Information related with the output pollutants, short description, units... + :type pollutant_info: DataFrame + + :param rank_distribution: Information of the writing process. That argument is a dictionary with the writing + process rank as key and another dictionary as value. That other dictionary contains: + - shape: Shape to write + - x_min: X minimum position to write on the full array. + - x_max: X maximum position to write on the full array. + - y_min: Y minimum position to write on the full array. + - y_max: Y maximum position to write on the full array. + - fid_min: Minimum cell ID of a flatten X Y domain. + - fid_max: Maximum cell ID of a flatten X Y domain. + + e.g. 24 time steps. 48 vertical levels, 10 x 10 + {0: {'fid_min': 0, 'y_min': 0, 'y_max': 5, 'fid_max': 50, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}, + 1: {'fid_min': 50, 'y_min': 5, 'y_max': 10, 'fid_max': 100, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}} + :type rank_distribution: dict + + :param global_attributes_path: Path to the file that contains the static global attributes. + :type global_attributes_path: str + + :param emission_summary: Indicates if you want to create the emission summary files. + :type emission_summary: bool + """ + spent_time = timeit.default_timer() + logger.write_log('CMAQ writer selected.') + + super(CmaqWriter, self).__init__(comm_world, comm_write, logger, netcdf_path, grid, date_array, pollutant_info, + rank_distribution, emission_summary) + if self.grid.grid_type not in ['Lambert Conformal Conic']: + raise TypeError("ERROR: Only Lambert Conformal Conic grid is implemented for CMAQ. " + + "The current grid type is '{0}'".format(self.grid.grid_type)) + + self.global_attributes_order = [ + 'IOAPI_VERSION', 'EXEC_ID', 'FTYPE', 'CDATE', 'CTIME', 'WDATE', 'WTIME', 'SDATE', 'STIME', 'TSTEP', 'NTHIK', + 'NCOLS', 'NROWS', 'NLAYS', 'NVARS', 'GDTYP', 'P_ALP', 'P_BET', 'P_GAM', 'XCENT', 'YCENT', 'XORIG', 'YORIG', + 'XCELL', 'YCELL', 'VGTYP', 'VGTOP', 'VGLVLS', 'GDNAM', 'UPNAM', 'FILEDESC', 'HISTORY', 'VAR-LIST'] + + self.global_attributes = self.create_global_attributes(global_attributes_path) + self.pollutant_info = self.change_pollutant_attributes() + + self.logger.write_time_log('CmaqWriter', '__init__', timeit.default_timer() - spent_time) + + def unit_change(self, emissions): + """ + Change the units from mol/h or g/h to mol/s or g/s. + + :param emissions: Emissions on dataframe. + :type emissions: DataFrame + + :return: Same emissions as input + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + # From mol/h g/h to mol/s g/s + emissions = emissions / 3600.0 + + self.logger.write_time_log('CmaqWriter', 'unit_change', timeit.default_timer() - spent_time) + return emissions + + def change_pollutant_attributes(self): + """ + Modify the emission list to be consistent to use the output as input for CMAQ model. + + :return: Emission list ready for CMAQ + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + new_pollutant_info = pd.DataFrame(columns=['pollutant', 'units', 'var_desc', 'long_name']) + + for i, (pollutant, variable) in enumerate(self.pollutant_info.iterrows()): + if variable.get('units') not in ['mol.s-1', 'g.s-1', 'mole/s', 'g/s']: + raise ValueError("'{0}' unit is not supported for CMAQ emission ".format(variable.get('units')) + + "input file. Set mol.s-1 or g.s-1 in the speciation_map file.") + new_pollutant_info.loc[i, 'pollutant'] = pollutant + if variable.get('units') in ['mol.s-1', 'mole/s']: + new_pollutant_info.loc[i, 'units'] = "{:<16}".format('mole/s') + else: + new_pollutant_info.loc[i, 'units'] = "{:<16}".format('g/s') + new_pollutant_info.loc[i, 'var_desc'] = "{:<80}".format(variable.get('description')) + new_pollutant_info.loc[i, 'long_name'] = "{:<16}".format(pollutant) + + new_pollutant_info.set_index('pollutant', inplace=True) + self.logger.write_time_log('CmaqWriter', 'change_pollutant_attributes', timeit.default_timer() - spent_time) + return new_pollutant_info + + def create_tflag(self): + """ + Create the content of the CMAQ variable TFLAG + + :return: Array with the content of TFLAG + :rtype: numpy.array + """ + spent_time = timeit.default_timer() + + a = np.array([[[]]]) + + for date in self.date_array: + b = np.array([[int(date.strftime('%Y%j'))], [int(date.strftime('%H%M%S'))]] * len(self.pollutant_info)) + a = np.append(a, b) + + a.shape = (len(self.date_array), 2, len(self.pollutant_info)) + self.logger.write_time_log('CmaqWriter', 'create_tflag', timeit.default_timer() - spent_time) + return a + + def str_var_list(self): + """ + Transform a list to a string with the elements with 16 white spaces. + + :return: List transformed on string. + :rtype: str + """ + spent_time = timeit.default_timer() + str_var_list = "" + + for var in list(self.pollutant_info.index): + str_var_list += "{:<16}".format(var) + + self.logger.write_time_log('CmaqWriter', 'str_var_list', timeit.default_timer() - spent_time) + return str_var_list + + def read_global_attributes(self, global_attributes_path): + spent_time = timeit.default_timer() + + float_atts = ['VGTOP'] + int_atts = ['FTYPE', 'NTHIK', 'VGTYP'] + str_atts = ['EXEC_ID', 'GDNAM'] + list_float_atts = ['VGLVLS'] + + atts_dict = { + 'EXEC_ID': "{:<80}".format('0.1alpha'), + 'FTYPE': np.int32(1), + 'NTHIK': np.int32(1), + 'VGTYP': np.int32(7), + 'VGTOP': np.float32(5000.), + 'VGLVLS': np.array([1., 0.], dtype=np.float32), + 'GDNAM': "{:<16}".format(''), + } + + df = pd.read_csv(global_attributes_path) + + for att in atts_dict.iterkeys(): + try: + if att in int_atts: + atts_dict[att] = np.int32(df.loc[df['attribute'] == att, 'value'].item()) + elif att in float_atts: + atts_dict[att] = np.float32(df.loc[df['attribute'] == att, 'value'].item()) + elif att in str_atts: + atts_dict[att] = str(df.loc[df['attribute'] == att, 'value'].item()) + if att == 'EXEC_ID': + atts_dict[att] = '{:<80}'.format(atts_dict[att]) + elif att == 'GDNAM': + atts_dict[att] = '{:<16}'.format(atts_dict[att]) + elif att in list_float_atts: + atts_dict[att] = np.array(df.loc[df['attribute'] == att, 'value'].item().split(), + dtype=np.float32) + except ValueError: + self.logger.write_log("WARNING: The global attribute {0} is not defined;".format(att) + + " Using default value '{0}'".format(atts_dict[att])) + if self.comm_write.Get_rank() == 0: + warn('WARNING: The global attribute {0} is not defined; Using default value {1}'.format( + att, atts_dict[att])) + + self.logger.write_time_log('CmaqWriter', 'read_global_attributes', timeit.default_timer() - spent_time) + return atts_dict + + def create_global_attributes(self, global_attributes_path): + """ + Create the global attributes and the order that they have to be filled. + + :return: Dict of global attributes and a list with the keys ordered. + :rtype: tuple + """ + from datetime import datetime + spent_time = timeit.default_timer() + + global_attributes = self.read_global_attributes(global_attributes_path) + + tstep = 1 * 10000 + + now = datetime.now() + global_attributes['IOAPI_VERSION'] = 'None: made only with NetCDF libraries' + global_attributes['CDATE'] = np.int32(now.strftime('%Y%j')) + global_attributes['CTIME'] = np.int32(now.strftime('%H%M%S')) + global_attributes['WDATE'] = np.int32(now.strftime('%Y%j')) + global_attributes['WTIME'] = np.int32(now.strftime('%H%M%S')) + global_attributes['SDATE'] = np.int32(self.date_array[0].strftime('%Y%j')) + global_attributes['STIME'] = np.int32(self.date_array[0].strftime('%H%M%S')) + global_attributes['TSTEP'] = np.int32(tstep) + global_attributes['NLAYS'] = np.int32(len(self.grid.vertical_desctiption)) + global_attributes['NVARS'] = np.int32(len(self.pollutant_info)) + global_attributes['UPNAM'] = "{:<16}".format('HERMESv3') + global_attributes['FILEDESC'] = 'Emissions generated by HERMESv3_BU.' + global_attributes['HISTORY'] = \ + 'Code developed by Barcelona Supercomputing Center (BSC, https://www.bsc.es/).' + \ + 'Developer: Carles Tena Medina (carles.tena@bsc.es), Marc Guevara Vilardell. (marc.guevara@bsc.es) ' + global_attributes['VAR-LIST'] = self.str_var_list() + + if self.grid.grid_type == 'Lambert Conformal Conic': + global_attributes['GDTYP'] = np.int32(2) + global_attributes['NCOLS'] = np.int32(self.grid.attributes['nx']) + global_attributes['NROWS'] = np.int32(self.grid.attributes['ny']) + global_attributes['P_ALP'] = np.float(self.grid.attributes['lat_1']) + global_attributes['P_BET'] = np.float(self.grid.attributes['lat_2']) + global_attributes['P_GAM'] = np.float(self.grid.attributes['lon_0']) + global_attributes['XCENT'] = np.float(self.grid.attributes['lon_0']) + global_attributes['YCENT'] = np.float(self.grid.attributes['lat_0']) + global_attributes['XORIG'] = np.float(self.grid.attributes['x_0']) - np.float( + self.grid.attributes['inc_x']) / 2 + global_attributes['YORIG'] = np.float(self.grid.attributes['y_0']) - np.float( + self.grid.attributes['inc_y']) / 2 + global_attributes['XCELL'] = np.float(self.grid.attributes['inc_x']) + global_attributes['YCELL'] = np.float(self.grid.attributes['inc_y']) + + self.logger.write_time_log('CmaqWriter', 'create_global_attributes', timeit.default_timer() - spent_time) + return global_attributes + + def write_netcdf(self, emissions): + """ + Create a NetCDF following the IOAPIv3.2 (CMAQ) conventions + + :param emissions: Emissions to write in the NetCDF with 'FID, level & time step as index and pollutant as + columns. + :type emissions: DataFrame + """ + spent_time = timeit.default_timer() + + netcdf = Dataset(self.netcdf_path, mode='w', parallel=True, comm=self.comm_write, info=MPI.Info()) + + # ===== DIMENSIONS ===== + self.logger.write_log('\tCreating NetCDF dimensions', message_level=2) + netcdf.createDimension('TSTEP', len(self.date_array)) + netcdf.createDimension('DATE-TIME', 2) + netcdf.createDimension('LAY', len(self.grid.vertical_desctiption)) + netcdf.createDimension('VAR', len(self.pollutant_info)) + netcdf.createDimension('ROW', self.grid.center_latitudes.shape[0]) + netcdf.createDimension('COL', self.grid.center_longitudes.shape[1]) + + # ========== VARIABLES ========== + self.logger.write_log('\tCreating NetCDF variables', message_level=2) + tflag = netcdf.createVariable('TFLAG', 'i', ('TSTEP', 'VAR', 'DATE-TIME',)) + tflag.setncatts({'units': "{:<16}".format(''), 'long_name': "{:<16}".format('TFLAG'), + 'var_desc': "{:<80}".format('Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS')}) + tflag[:] = self.create_tflag() + + # ========== POLLUTANTS ========== + for var_name in emissions.columns.values: + self.logger.write_log('\t\tCreating {0} variable'.format(var_name), message_level=3) + + var_data = self.dataframe_to_array(emissions.loc[:, [var_name]]) + var = netcdf.createVariable(var_name, np.float64, ('TSTEP', 'LAY', 'ROW', 'COL',)) + var[:, :, + self.rank_distribution[self.comm_write.Get_rank()]['y_min']: + self.rank_distribution[self.comm_write.Get_rank()]['y_max'], + self.rank_distribution[self.comm_write.Get_rank()]['x_min']: + self.rank_distribution[self.comm_write.Get_rank()]['x_max']] = var_data + + var.long_name = self.pollutant_info.loc[var_name, 'long_name'] + var.units = self.pollutant_info.loc[var_name, 'units'] + var.var_desc = self.pollutant_info.loc[var_name, 'var_desc'] + + # ========== METADATA ========== + self.logger.write_log('\tCreating NetCDF metadata', message_level=2) + + for attribute in self.global_attributes_order: + netcdf.setncattr(attribute, self.global_attributes[attribute]) + + netcdf.close() + self.logger.write_log('NetCDF write at {0}'.format(self.netcdf_path)) + self.logger.write_time_log('CmaqWriter', 'write_netcdf', timeit.default_timer() - spent_time) + + return True diff --git a/hermesv3_bu/writer/default_writer.py b/hermesv3_bu/writer/default_writer.py new file mode 100755 index 0000000000000000000000000000000000000000..8076de535e6c966a1b14fd0a2a9cf2513611cb6b --- /dev/null +++ b/hermesv3_bu/writer/default_writer.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python + +import numpy as np +from netCDF4 import Dataset, date2num +from hermesv3_bu.writer.writer import Writer +from mpi4py import MPI +import timeit +from hermesv3_bu.logger.log import Log +import time + + +class DefaultWriter(Writer): + def __init__(self, comm_world, comm_write, logger, netcdf_path, grid, date_array, pollutant_info, + rank_distribution, emission_summary=False): + """ + Initialise the Default writer that will write a NetCDF CF-1.6 complient. + + :param comm_world: Global communicator for all the calculation process + :type comm_world: MPI.COMM + + :param comm_write: Sector communicator. + :type comm_write: MPI.Intracomm + + :param logger: Logger + :type logger: Log + + :param netcdf_path: Path to the output NetCDF file- + :type netcdf_path: str + + :param grid: Output grid definition. + :type grid: hermesv3_bu.grids.grid.Grid + + :param date_array: Array with each time step to be calculated. + :type date_array: list of datetime.datetime + + :param pollutant_info: Information related with the output pollutants, short description, units... + :type pollutant_info: DataFrame + + :param rank_distribution: Information of the writing process. That argument is a dictionary with the writing + process rank as key and another dictionary as value. That other dictionary contains: + - shape: Shape to write + - x_min: X minimum position to write on the full array. + - x_max: X maximum position to write on the full array. + - y_min: Y minimum position to write on the full array. + - y_max: Y maximum position to write on the full array. + - fid_min: Minimum cell ID of a flatten X Y domain. + - fid_max: Maximum cell ID of a flatten X Y domain. + + e.g. 24 time steps. 48 vertical levels, 10 x 10 + {0: {'fid_min': 0, 'y_min': 0, 'y_max': 5, 'fid_max': 50, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}, + 1: {'fid_min': 50, 'y_min': 5, 'y_max': 10, 'fid_max': 100, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}} + :type rank_distribution: dict + + :param emission_summary: Indicates if you want to create the emission summary files. + :type emission_summary: bool + """ + spent_time = timeit.default_timer() + logger.write_log('Default writer selected.') + super(DefaultWriter, self).__init__(comm_world, comm_write, logger, netcdf_path, grid, date_array, + pollutant_info, rank_distribution, emission_summary) + + self.logger.write_time_log('DefaultWriter', '__init__', timeit.default_timer() - spent_time) + + def unit_change(self, emissions): + """ + No unit changes. + + :param emissions: Emissions on dataframe. + :type emissions: DataFrame + + :return: Same emissions as input + :rtype: DataFrame + """ + self.logger.write_time_log('DefaultWriter', 'unit_change', 0.0) + + return emissions + + def write_netcdf(self, emissions): + """ + Create a NetCDF following the CF-1.6 conventions + + :param emissions: Emissions to write in the NetCDF with 'FID, level & time step as index and pollutant as + columns. + :type emissions: DataFrame + """ + from cf_units import Unit + spent_time = timeit.default_timer() + + if self.comm_write.Get_size() > 1: + netcdf = Dataset(self.netcdf_path, format="NETCDF4", mode='w', + parallel=True, comm=self.comm_write, info=MPI.Info()) + else: + netcdf = Dataset(self.netcdf_path, format="NETCDF4", mode='w') + + # ========== DIMENSIONS ========== + self.logger.write_log('\tCreating NetCDF dimensions', message_level=2) + if self.grid.grid_type == 'Regular Lat-Lon': + netcdf.createDimension('lat', self.grid.center_latitudes.shape[0]) + netcdf.createDimension('lon', self.grid.center_longitudes.shape[0]) + var_dim = ('lat', 'lon',) + lat_dim = ('lat',) + lon_dim = ('lon',) + + elif self.grid.grid_type in ['Lambert Conformal Conic', 'Mercator']: + netcdf.createDimension('y', len(self.grid.y)) + netcdf.createDimension('x', len(self.grid.x)) + var_dim = ('y', 'x',) + lat_dim = lon_dim = var_dim + + elif self.grid.grid_type == 'Rotated': + netcdf.createDimension('rlat', len(self.grid.rlat)) + netcdf.createDimension('rlon', len(self.grid.rlon)) + var_dim = ('rlat', 'rlon') + lat_dim = lon_dim = var_dim + else: + var_dim = lat_dim = lon_dim = None + + netcdf.createDimension('nv', len(self.grid.boundary_latitudes[0, 0])) + + netcdf.createDimension('lev', len(self.grid.vertical_desctiption)) + netcdf.createDimension('time', len(self.date_array)) + + # ========== VARIABLES ========== + self.logger.write_log('\tCreating NetCDF variables', message_level=2) + self.logger.write_log('\t\tCreating time variable', message_level=3) + + time_var = netcdf.createVariable('time', np.float64, ('time',)) + time_var.units = 'hours since {0}'.format(self.date_array[0].strftime("%Y-%m-%d %H:%M:%S")) + time_var.standard_name = "time" + time_var.calendar = "gregorian" + time_var.long_name = "time" + time_var[:] = date2num(self.date_array, time_var.units, calendar=time_var.calendar) + + self.logger.write_log('\t\tCreating lev variable', message_level=3) + lev = netcdf.createVariable('lev', np.float64, ('lev',)) + lev.units = Unit("m").symbol + lev.positive = 'up' + lev[:] = self.grid.vertical_desctiption + + self.logger.write_log('\t\tCreating lat variable', message_level=3) + lats = netcdf.createVariable('lat', np.float64, lat_dim) + lats.units = "degrees_north" + lats.axis = "Y" + lats.long_name = "latitude coordinate" + lats.standard_name = "latitude" + lats[:] = self.grid.center_latitudes + lats.bounds = "lat_bnds" + lat_bnds = netcdf.createVariable('lat_bnds', np.float64, lat_dim + ('nv',)) + lat_bnds[:] = self.grid.boundary_latitudes + + self.logger.write_log('\t\tCreating lon variable', message_level=3) + lons = netcdf.createVariable('lon', np.float64, lon_dim) + lons.units = "degrees_east" + lons.axis = "X" + lons.long_name = "longitude coordinate" + lons.standard_name = "longitude" + lons[:] = self.grid.center_longitudes + lons.bounds = "lon_bnds" + lon_bnds = netcdf.createVariable('lon_bnds', np.float64, lon_dim + ('nv',)) + lon_bnds[:] = self.grid.boundary_longitudes + + if self.grid.grid_type in ['Lambert Conformal Conic', 'Mercator']: + self.logger.write_log('\t\tCreating x variable', message_level=3) + x_var = netcdf.createVariable('x', np.float64, ('x',)) + x_var.units = Unit("km").symbol + x_var.long_name = "x coordinate of projection" + x_var.standard_name = "projection_x_coordinate" + x_var[:] = self.grid.x + + self.logger.write_log('\t\tCreating y variable', message_level=3) + y_var = netcdf.createVariable('y', np.float64, ('y',)) + y_var.units = Unit("km").symbol + y_var.long_name = "y coordinate of projection" + y_var.standard_name = "projection_y_coordinate" + y_var[:] = self.grid.y + + elif self.grid.grid_type == 'Rotated': + self.logger.write_log('\t\tCreating rlat variable', message_level=3) + rlat = netcdf.createVariable('rlat', np.float64, ('rlat',)) + rlat.long_name = "latitude in rotated pole grid" + rlat.units = Unit("degrees").symbol + rlat.standard_name = "grid_latitude" + rlat[:] = self.grid.rlat + + # Rotated Longitude + self.logger.write_log('\t\tCreating rlon variable', message_level=3) + rlon = netcdf.createVariable('rlon', np.float64, ('rlon',)) + rlon.long_name = "longitude in rotated pole grid" + rlon.units = Unit("degrees").symbol + rlon.standard_name = "grid_longitude" + rlon[:] = self.grid.rlon + + # ========== POLLUTANTS ========== + # if 'Unnamed: 0' in emissions.columns.values: + # emissions.drop(columns=['Unnamed: 0'], inplace=True) + for var_name in emissions.columns.values: + self.logger.write_log('\t\tCreating {0} variable'.format(var_name), message_level=3) + var_data = self.dataframe_to_array(emissions.loc[:, [var_name]]) + # var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim, + # chunksizes=self.rank_distribution[0]['shape']) + var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim) + if self.comm_write.Get_size() > 1: + var.set_collective(True) + + var[:, :, + self.rank_distribution[self.comm_write.Get_rank()]['y_min']: + self.rank_distribution[self.comm_write.Get_rank()]['y_max'], + self.rank_distribution[self.comm_write.Get_rank()]['x_min']: + self.rank_distribution[self.comm_write.Get_rank()]['x_max']] = var_data + + var.long_name = self.pollutant_info.loc[var_name, 'description'] + var.units = self.pollutant_info.loc[var_name, 'units'] + var.missing_value = -999.0 + var.coordinates = 'lat lon' + if self.grid.grid_type == 'Regular Lat-Lon': + var.grid_mapping = 'Latitude_Longitude' + elif self.grid.grid_type == 'Lambert Conformal Conic': + var.grid_mapping = 'Lambert_Conformal' + elif self.grid.grid_type == 'Rotated': + var.grid_mapping = 'rotated_pole' + elif self.grid.grid_type == 'Mercator': + var.grid_mapping = 'mercator' + + # ========== METADATA ========== + self.logger.write_log('\tCreating NetCDF metadata', message_level=2) + + self.logger.write_log('\t\tCreating Coordinate Reference System metadata', message_level=3) + + if self.grid.grid_type == 'Regular Lat-Lon': + mapping = netcdf.createVariable('Latitude_Longitude', 'i') + mapping.grid_mapping_name = "latitude_longitude" + mapping.semi_major_axis = 6371000.0 + mapping.inverse_flattening = 0 + + elif self.grid.grid_type == 'Lambert Conformal Conic': + mapping = netcdf.createVariable('Lambert_Conformal', 'i') + mapping.grid_mapping_name = "lambert_conformal_conic" + mapping.standard_parallel = "{0}, {1}".format(self.grid.attributes['lat_1'], self.grid.attributes['lat_2']) + mapping.longitude_of_central_meridian = self.grid.attributes['lon_0'] + mapping.latitude_of_projection_origin = self.grid.attributes['lat_0'] + + elif self.grid.grid_type == 'Rotated': + mapping = netcdf.createVariable('rotated_pole', 'c') + mapping.grid_mapping_name = 'rotated_latitude_longitude' + mapping.grid_north_pole_latitude = 90 - self.grid.attributes['new_pole_latitude_degrees'] + mapping.grid_north_pole_longitude = self.grid.attributes['new_pole_longitude_degrees'] + + elif self.grid.grid_type == 'Mercator': + mapping = netcdf.createVariable('mercator', 'i') + mapping.grid_mapping_name = "mercator" + mapping.longitude_of_projection_origin = self.grid.attributes['lon_0'] + mapping.standard_parallel = self.grid.attributes['lat_ts'] + + netcdf.setncattr('Conventions', 'CF-1.6') + self.comm_write.Barrier() + netcdf.close() + self.logger.write_log('NetCDF write at {0}'.format(self.netcdf_path)) + self.logger.write_time_log('DefaultWriter', 'write_netcdf', timeit.default_timer() - spent_time) + + return True diff --git a/hermesv3_bu/writer/monarch_writer.py b/hermesv3_bu/writer/monarch_writer.py new file mode 100755 index 0000000000000000000000000000000000000000..7a00324e5c14c92f35110ca794fa642f6674f35e --- /dev/null +++ b/hermesv3_bu/writer/monarch_writer.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python + +import numpy as np +from netCDF4 import Dataset, date2num +from hermesv3_bu.writer.writer import Writer +from mpi4py import MPI +import timeit +from hermesv3_bu.logger.log import Log + + +class MonarchWriter(Writer): + def __init__(self, comm_world, comm_write, logger, netcdf_path, grid, date_array, pollutant_info, + rank_distribution, emission_summary=False): + """ + Initialise the MONARCH writer that will write a NetCDF CF-1.6 complient. + + :param comm_world: Global communicator for all the calculation process + :type comm_world: MPI.COMM + + :param comm_write: Sector communicator. + :type comm_write: MPI.Intracomm + + :param logger: Logger + :type logger: Log + + :param netcdf_path: Path to the output NetCDF file- + :type netcdf_path: str + + :param grid: Output grid definition. + :type grid: hermesv3_bu.grids.grid.Grid + + :param date_array: Array with each time step to be calculated. + :type date_array: list of datetime.datetime + + :param pollutant_info: Information related with the output pollutants, short description, units... + :type pollutant_info: DataFrame + + :param rank_distribution: Information of the writing process. That argument is a dictionary with the writing + process rank as key and another dictionary as value. That other dictionary contains: + - shape: Shape to write + - x_min: X minimum position to write on the full array. + - x_max: X maximum position to write on the full array. + - y_min: Y minimum position to write on the full array. + - y_max: Y maximum position to write on the full array. + - fid_min: Minimum cell ID of a flatten X Y domain. + - fid_max: Maximum cell ID of a flatten X Y domain. + + e.g. 24 time steps. 48 vertical levels, 10 x 10 + {0: {'fid_min': 0, 'y_min': 0, 'y_max': 5, 'fid_max': 50, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}, + 1: {'fid_min': 50, 'y_min': 5, 'y_max': 10, 'fid_max': 100, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}} + :type rank_distribution: dict + + :param emission_summary: Indicates if you want to create the emission summary files. + :type emission_summary: bool + """ + spent_time = timeit.default_timer() + logger.write_log('MONARCH writer selected.') + + super(MonarchWriter, self).__init__(comm_world, comm_write, logger, netcdf_path, grid, date_array, + pollutant_info, rank_distribution, emission_summary) + + if self.grid.grid_type not in ['Rotated']: + raise TypeError("ERROR: Only Rotated grid is implemented for MONARCH. " + + "The current grid type is '{0}'".format(self.grid.grid_type)) + + for i, (pollutant, variable) in enumerate(self.pollutant_info.iterrows()): + if variable.get('units') not in ['mol.s-1.m-2', 'kg.s-1.m-2']: + raise ValueError("'{0}' unit is not supported for CMAQ emission ".format(variable.get('units')) + + "input file. Set mol.s-1.m-2 or kg.s-1.m-2 in the speciation_map file.") + + self.logger.write_time_log('MonarchWriter', '__init__', timeit.default_timer() - spent_time) + + def unit_change(self, emissions): + """ + From mol/h or g/h to mol/km.s or g/km.s + + :param emissions: Emissions on dataframe. + :type emissions: DataFrame + + :return: Same emissions as input + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + if self.comm_write.Get_rank() == 0: + self.grid.add_cell_area() + cell_area = self.grid.shapefile[['FID', 'cell_area']] + cell_area.set_index('FID', inplace=True) + else: + cell_area = None + cell_area = self.comm_write.bcast(cell_area, root=0) + + # From mol/h g/h to mol/m2.s g/m2.s + emissions = emissions.divide(cell_area['cell_area'].mul(3600), axis=0, level='FID') + + for pollutant, info in self.pollutant_info.iterrows(): + if info.get('units') == "kg.s-1.m-2": + # From g.s-1.m-2 to kg.s-1.m-2 + emissions[[pollutant]] = emissions[[pollutant]].div(10**3) + self.logger.write_time_log('MonarchWriter', '__init__', timeit.default_timer() - spent_time) + + return emissions + + def write_netcdf(self, emissions): + """ + Create a NetCDF following the CF-1.6 conventions + + :param emissions: Emissions to write in the NetCDF with 'FID, level & time step as index and pollutant as + columns. + :type emissions: DataFrame + """ + from cf_units import Unit + spent_time = timeit.default_timer() + netcdf = Dataset(self.netcdf_path, mode='w', parallel=True, comm=self.comm_write, info=MPI.Info()) + + # ========== DIMENSIONS ========== + self.logger.write_log('\tCreating NetCDF dimensions', message_level=2) + + netcdf.createDimension('rlat', len(self.grid.rlat)) + netcdf.createDimension('rlon', len(self.grid.rlon)) + var_dim = ('rlat', 'rlon') + lat_dim = lon_dim = var_dim + + netcdf.createDimension('nv', len(self.grid.boundary_latitudes[0, 0])) + + netcdf.createDimension('lev', len(self.grid.vertical_desctiption)) + netcdf.createDimension('time', len(self.date_array)) + + # ========== VARIABLES ========== + self.logger.write_log('\tCreating NetCDF variables', message_level=2) + self.logger.write_log('\t\tCreating time variable', message_level=3) + + time = netcdf.createVariable('time', np.float64, ('time',)) + time.units = 'hours since {0}'.format(self.date_array[0].strftime("%Y-%m-%d %H:%M:%S")) + time.standard_name = "time" + time.calendar = "gregorian" + time.long_name = "time" + time[:] = date2num(self.date_array, time.units, calendar=time.calendar) + + self.logger.write_log('\t\tCreating lev variable', message_level=3) + lev = netcdf.createVariable('lev', np.float64, ('lev',)) + lev.units = Unit("m").symbol + lev.positive = 'up' + lev[:] = self.grid.vertical_desctiption + + self.logger.write_log('\t\tCreating lat variable', message_level=3) + lats = netcdf.createVariable('lat', np.float64, lat_dim) + lats.units = "degrees_north" + lats.axis = "Y" + lats.long_name = "latitude coordinate" + lats.standard_name = "latitude" + lats[:] = self.grid.center_latitudes + lats.bounds = "lat_bnds" + lat_bnds = netcdf.createVariable('lat_bnds', np.float64, lat_dim + ('nv',)) + lat_bnds[:] = self.grid.boundary_latitudes + + self.logger.write_log('\t\tCreating lon variable', message_level=3) + lons = netcdf.createVariable('lon', np.float64, lon_dim) + lons.units = "degrees_east" + lons.axis = "X" + lons.long_name = "longitude coordinate" + lons.standard_name = "longitude" + lons[:] = self.grid.center_longitudes + lons.bounds = "lon_bnds" + lon_bnds = netcdf.createVariable('lon_bnds', np.float64, lon_dim + ('nv',)) + lon_bnds[:] = self.grid.boundary_longitudes + + self.logger.write_log('\t\tCreating rlat variable', message_level=3) + rlat = netcdf.createVariable('rlat', np.float64, ('rlat',)) + rlat.long_name = "latitude in rotated pole grid" + rlat.units = Unit("degrees").symbol + rlat.standard_name = "grid_latitude" + rlat[:] = self.grid.rlat + + # Rotated Longitude + self.logger.write_log('\t\tCreating rlon variable', message_level=3) + rlon = netcdf.createVariable('rlon', np.float64, ('rlon',)) + rlon.long_name = "longitude in rotated pole grid" + rlon.units = Unit("degrees").symbol + rlon.standard_name = "grid_longitude" + rlon[:] = self.grid.rlon + + # ========== POLLUTANTS ========== + for var_name in emissions.columns.values: + self.logger.write_log('\t\tCreating {0} variable'.format(var_name), message_level=3) + + var_data = self.dataframe_to_array(emissions.loc[:, [var_name]]) + # var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim, + # chunksizes=self.rank_distribution[0]['shape']) + var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim) + + var[:, :, + self.rank_distribution[self.comm_write.Get_rank()]['y_min']: + self.rank_distribution[self.comm_write.Get_rank()]['y_max'], + self.rank_distribution[self.comm_write.Get_rank()]['x_min']: + self.rank_distribution[self.comm_write.Get_rank()]['x_max']] = var_data + + var.long_name = self.pollutant_info.loc[var_name, 'description'] + var.units = self.pollutant_info.loc[var_name, 'units'] + var.missing_value = -999.0 + var.coordinates = 'lat lon' + var.grid_mapping = 'rotated_pole' + + # ========== METADATA ========== + self.logger.write_log('\tCreating NetCDF metadata', message_level=2) + + self.logger.write_log('\t\tCreating Coordinate Reference System metadata', message_level=3) + + mapping = netcdf.createVariable('rotated_pole', 'c') + mapping.grid_mapping_name = 'rotated_latitude_longitude' + mapping.grid_north_pole_latitude = 90 - self.grid.attributes['new_pole_latitude_degrees'] + mapping.grid_north_pole_longitude = self.grid.attributes['new_pole_longitude_degrees'] + + netcdf.setncattr('Conventions', 'CF-1.6') + netcdf.close() + self.logger.write_log('NetCDF write at {0}'.format(self.netcdf_path)) + self.logger.write_time_log('MonarchWriter', 'write_netcdf', timeit.default_timer() - spent_time) + + return True diff --git a/hermesv3_bu/writer/wrfchem_writer.py b/hermesv3_bu/writer/wrfchem_writer.py new file mode 100755 index 0000000000000000000000000000000000000000..693727c136d67ef889a45a804398b27bf693cfa9 --- /dev/null +++ b/hermesv3_bu/writer/wrfchem_writer.py @@ -0,0 +1,375 @@ +#!/usr/bin/env python + +import numpy as np +import pandas as pd +from warnings import warn +import sys +from netCDF4 import Dataset, date2num +from hermesv3_bu.writer.writer import Writer +from mpi4py import MPI +import timeit +from hermesv3_bu.logger.log import Log + + +class WrfChemWriter(Writer): + def __init__(self, comm_world, comm_write, logger, netcdf_path, grid, date_array, pollutant_info, + rank_distribution, global_attributes_path, emission_summary=False): + """ + Initialise the WRF-Chem writer that will write a NetCDF in the CMAQ input format (IOAPIv3.2). + + :param comm_world: Global communicator for all the calculation process + :type comm_world: MPI.COMM + + :param comm_write: Sector communicator. + :type comm_write: MPI.Intracomm + + :param logger: Logger + :type logger: Log + + :param netcdf_path: Path to the output NetCDF file- + :type netcdf_path: str + + :param grid: Output grid definition. + :type grid: hermesv3_bu.grids.grid.Grid + + :param date_array: Array with each time step to be calculated. + :type date_array: list of datetime.datetime + + :param pollutant_info: Information related with the output pollutants, short description, units... + :type pollutant_info: DataFrame + + :param rank_distribution: Information of the writing process. That argument is a dictionary with the writing + process rank as key and another dictionary as value. That other dictionary contains: + - shape: Shape to write + - x_min: X minimum position to write on the full array. + - x_max: X maximum position to write on the full array. + - y_min: Y minimum position to write on the full array. + - y_max: Y maximum position to write on the full array. + - fid_min: Minimum cell ID of a flatten X Y domain. + - fid_max: Maximum cell ID of a flatten X Y domain. + + e.g. 24 time steps. 48 vertical levels, 10 x 10 + {0: {'fid_min': 0, 'y_min': 0, 'y_max': 5, 'fid_max': 50, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}, + 1: {'fid_min': 50, 'y_min': 5, 'y_max': 10, 'fid_max': 100, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}} + :type rank_distribution: dict + + :param global_attributes_path: Path to the file that contains the static global attributes. + :type global_attributes_path: str + + :param emission_summary: Indicates if you want to create the emission summary files. + :type emission_summary: bool + """ + spent_time = timeit.default_timer() + logger.write_log('WRF-Chem writer selected.') + + super(WrfChemWriter, self).__init__(comm_world, comm_write, logger, netcdf_path, grid, date_array, + pollutant_info, rank_distribution, emission_summary) + if self.grid.grid_type not in ['Lambert Conformal Conic', 'Mercator']: + raise TypeError("ERROR: Only Lambert Conformal Conic or Mercator grid is implemented for WRF-Chem. " + + "The current grid type is '{0}'".format(self.grid.grid_type)) + + self.global_attributes_order = [ + 'TITLE', 'START_DATE', 'WEST-EAST_GRID_DIMENSION', 'SOUTH-NORTH_GRID_DIMENSION', + 'BOTTOM-TOP_GRID_DIMENSION', 'DX', 'DY', 'GRIDTYPE', 'DIFF_OPT', 'KM_OPT', 'DAMP_OPT', 'DAMPCOEF', 'KHDIF', + 'KVDIF', 'MP_PHYSICS', 'RA_LW_PHYSICS', 'RA_SW_PHYSICS', 'SF_SFCLAY_PHYSICS', 'SF_SURFACE_PHYSICS', + 'BL_PBL_PHYSICS', 'CU_PHYSICS', 'SF_LAKE_PHYSICS', 'SURFACE_INPUT_SOURCE', 'SST_UPDATE', 'GRID_FDDA', + 'GFDDA_INTERVAL_M', 'GFDDA_END_H', 'GRID_SFDDA', 'SGFDDA_INTERVAL_M', 'SGFDDA_END_H', + 'WEST-EAST_PATCH_START_UNSTAG', 'WEST-EAST_PATCH_END_UNSTAG', 'WEST-EAST_PATCH_START_STAG', + 'WEST-EAST_PATCH_END_STAG', 'SOUTH-NORTH_PATCH_START_UNSTAG', 'SOUTH-NORTH_PATCH_END_UNSTAG', + 'SOUTH-NORTH_PATCH_START_STAG', 'SOUTH-NORTH_PATCH_END_STAG', 'BOTTOM-TOP_PATCH_START_UNSTAG', + 'BOTTOM-TOP_PATCH_END_UNSTAG', 'BOTTOM-TOP_PATCH_START_STAG', 'BOTTOM-TOP_PATCH_END_STAG', 'GRID_ID', + 'PARENT_ID', 'I_PARENT_START', 'J_PARENT_START', 'PARENT_GRID_RATIO', 'DT', 'CEN_LAT', 'CEN_LON', + 'TRUELAT1', 'TRUELAT2', 'MOAD_CEN_LAT', 'STAND_LON', 'POLE_LAT', 'POLE_LON', 'GMT', 'JULYR', 'JULDAY', + 'MAP_PROJ', 'MMINLU', 'NUM_LAND_CAT', 'ISWATER', 'ISLAKE', 'ISICE', 'ISURBAN', 'ISOILWATER'] + + self.global_attributes = self.create_global_attributes(global_attributes_path) + self.pollutant_info = self.change_pollutant_attributes() + + self.logger.write_time_log('WrfChemWriter', '__init__', timeit.default_timer() - spent_time) + + def unit_change(self, emissions): + """ + Change the units from mol/h or g/h to mol/s or g/s. + + :param emissions: Emissions on dataframe. + :type emissions: DataFrame + + :return: Same emissions as input + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + if self.comm_write.Get_rank() == 0: + self.grid.add_cell_area() + + cell_area = self.grid.shapefile[['FID', 'cell_area']] + cell_area.set_index('FID', inplace=True) + else: + cell_area = None + cell_area = self.comm_write.bcast(cell_area, root=0) + + # From mol/h or g/h to mol/m2.h or g/m2.h + emissions = emissions.divide(cell_area['cell_area'], axis=0, level='FID') + + for pollutant, info in self.pollutant_info.iterrows(): + if info.get('units') == "ug/m3 m/s": + # From g/m2.h to ug/m2.s + emissions[[pollutant]] = emissions[[pollutant]].mul(10**6 / 3600) + elif info.get('units') == "mol km^-2 hr^-1": + # From mol/m2.h to mol/km2.h + emissions[[pollutant]] = emissions[[pollutant]].mul(10**6) + + self.logger.write_time_log('WrfChemWriter', 'unit_change', timeit.default_timer() - spent_time) + return emissions + + def change_pollutant_attributes(self): + """ + Modify the emission list to be consistent to use the output as input for CMAQ model. + + :return: Emission list ready for CMAQ + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + new_pollutant_info = pd.DataFrame(columns=['pollutant', 'units', 'FieldType', 'MemoryOrder', 'description', + 'stagger', 'coordinates']) + + for i, (pollutant, variable) in enumerate(self.pollutant_info.iterrows()): + if variable.get('units') not in ['mol.h-1.km-2', "mol km^-2 hr^-1", 'ug.s-1.m-2', "ug/m3 m/s"]: + raise ValueError("'{0}' unit is not supported for WRF-Chem emission ".format(variable.get('units')) + + "input file. Set '{0}' in the speciation_map file.".format( + ['mol.h-1.km-2', "mol km^-2 hr^-1", 'ug.s-1.m-2', "ug/m3 m/s"])) + + new_pollutant_info.loc[i, 'pollutant'] = pollutant + if variable.get('units') in ['mol.h-1.km-2', "mol km^-2 hr^-1"]: + new_pollutant_info.loc[i, 'units'] = "mol km^-2 hr^-1" + else: + new_pollutant_info.loc[i, 'units'] = "ug/m3 m/s" + + new_pollutant_info.loc[i, 'FieldType'] = np.int32(104) + new_pollutant_info.loc[i, 'MemoryOrder'] = "XYZ" + new_pollutant_info.loc[i, 'description'] = "EMISSIONS" + new_pollutant_info.loc[i, 'stagger'] = "" + new_pollutant_info.loc[i, 'coordinates'] = "XLONG XLAT" + + new_pollutant_info.set_index('pollutant', inplace=True) + self.logger.write_time_log('WrfChemWriter', 'change_pollutant_attributes', timeit.default_timer() - spent_time) + return new_pollutant_info + + def read_global_attributes(self, global_attributes_path): + spent_time = timeit.default_timer() + + float_atts = ['DAMPCOEF', 'KHDIF', 'KVDIF', 'CEN_LAT', 'CEN_LON', 'DT'] + int_atts = [ + 'BOTTOM-TOP_GRID_DIMENSION', 'DIFF_OPT', 'KM_OPT', 'DAMP_OPT', 'MP_PHYSICS', 'RA_LW_PHYSICS', + 'RA_SW_PHYSICS', 'SF_SFCLAY_PHYSICS', 'SF_SURFACE_PHYSICS', 'BL_PBL_PHYSICS', 'CU_PHYSICS', + 'SF_LAKE_PHYSICS', 'SURFACE_INPUT_SOURCE', 'SST_UPDATE', 'GRID_FDDA', 'GFDDA_INTERVAL_M', 'GFDDA_END_H', + 'GRID_SFDDA', 'SGFDDA_INTERVAL_M', 'SGFDDA_END_H', 'BOTTOM-TOP_PATCH_START_UNSTAG', + 'BOTTOM-TOP_PATCH_END_UNSTAG', 'BOTTOM-TOP_PATCH_START_STAG', 'BOTTOM-TOP_PATCH_END_STAG', 'GRID_ID', + 'PARENT_ID', 'I_PARENT_START', 'J_PARENT_START', 'PARENT_GRID_RATIO', 'NUM_LAND_CAT', 'ISWATER', 'ISLAKE', + 'ISICE', 'ISURBAN', 'ISOILWATER', 'HISTORY'] + str_atts = ['GRIDTYPE', 'MMINLU'] + + if self.grid.grid_type == 'Lambert Conformal Conic': + lat_ts = np.float32(self.grid.attributes['lat_0']) + elif self.grid.grid_type == 'Mercator': + lat_ts = np.float32(self.grid.attributes['lat_ts']) + else: + raise TypeError("ERROR: Only Lambert Conformal Conic or Mercator grid is implemented for WRF-Chem. " + + "The current grid type is '{0}'".format(self.grid.grid_type)) + + atts_dict = { + 'BOTTOM-TOP_GRID_DIMENSION': np.int32(45), + 'GRIDTYPE': 'C', + 'DIFF_OPT': np.int32(1), + 'KM_OPT': np.int32(4), + 'DAMP_OPT': np.int32(3), + 'DAMPCOEF': np.float32(0.2), + 'KHDIF': np.float32(0.), + 'KVDIF': np.float32(0.), + 'MP_PHYSICS': np.int32(6), + 'RA_LW_PHYSICS': np.int32(4), + 'RA_SW_PHYSICS': np.int32(4), + 'SF_SFCLAY_PHYSICS': np.int32(2), + 'SF_SURFACE_PHYSICS': np.int32(2), + 'BL_PBL_PHYSICS': np.int32(8), + 'CU_PHYSICS': np.int32(0), + 'SF_LAKE_PHYSICS': np.int32(0), + 'SURFACE_INPUT_SOURCE': np.int32(1), + 'SST_UPDATE': np.int32(0), + 'GRID_FDDA': np.int32(0), + 'GFDDA_INTERVAL_M': np.int32(0), + 'GFDDA_END_H': np.int32(0), + 'GRID_SFDDA': np.int32(0), + 'SGFDDA_INTERVAL_M': np.int32(0), + 'SGFDDA_END_H': np.int32(0), + 'BOTTOM-TOP_PATCH_START_UNSTAG': np.int32(1), + 'BOTTOM-TOP_PATCH_END_UNSTAG': np.int32(44), + 'BOTTOM-TOP_PATCH_START_STAG': np.int32(1), + 'BOTTOM-TOP_PATCH_END_STAG': np.int32(45), + 'GRID_ID': np.int32(1), + 'PARENT_ID': np.int32(0), + 'I_PARENT_START': np.int32(1), + 'J_PARENT_START': np.int32(1), + 'PARENT_GRID_RATIO': np.int32(1), + 'DT': np.float32(18.), + 'MMINLU': 'MODIFIED_IGBP_MODIS_NOAH', + 'NUM_LAND_CAT': np.int32(41), + 'ISWATER': np.int32(17), + 'ISLAKE': np.int32(-1), + 'ISICE': np.int32(15), + 'ISURBAN': np.int32(13), + 'ISOILWATER': np.int32(14), + 'CEN_LAT': lat_ts, + 'CEN_LON': np.float32(self.grid.attributes['lon_0']) + } + + df = pd.read_csv(global_attributes_path) + + for att in atts_dict.iterkeys(): + try: + if att in int_atts: + atts_dict[att] = np.int32(df.loc[df['attribute'] == att, 'value'].item()) + elif att in float_atts: + atts_dict[att] = np.float32(df.loc[df['attribute'] == att, 'value'].item()) + elif att in str_atts: + atts_dict[att] = str(df.loc[df['attribute'] == att, 'value'].item()) + + except ValueError: + self.logger.write_log("WARNING: The global attribute {0} is not defined;".format(att) + + " Using default value '{0}'".format(atts_dict[att])) + if self.comm_write.Get_rank() == 0: + warn('WARNING: The global attribute {0} is not defined; Using default value {1}'.format( + att, atts_dict[att])) + + self.logger.write_time_log('WrfChemWriter', 'read_global_attributes', timeit.default_timer() - spent_time) + return atts_dict + + def create_global_attributes(self, global_attributes_path): + """ + Create the global attributes and the order that they have to be filled. + + :return: Dict of global attributes and a list with the keys ordered. + :rtype: tuple + """ + spent_time = timeit.default_timer() + + global_attributes = self.read_global_attributes(global_attributes_path) + + global_attributes['TITLE'] = 'Emissions generated by HERMESv3_BU.' + global_attributes['START_DATE'] = self.date_array[0].strftime("%Y-%m-%d_%H:%M:%S") + global_attributes['JULYR'] = np.int32(self.date_array[0].year) + global_attributes['JULDAY'] = np.int32(self.date_array[0].strftime("%j")) + global_attributes['GMT'] = np.float32(self.date_array[0].hour) + global_attributes['HISTORY'] = \ + 'Code developed by Barcelona Supercomputing Center (BSC, https://www.bsc.es/). ' + \ + 'Developer: Carles Tena Medina (carles.tena@bsc.es), Marc Guevara Vilardell. (marc.guevara@bsc.es)' + + if self.grid.grid_type in ['Lambert Conformal Conic', 'Mercator']: + global_attributes['WEST-EAST_GRID_DIMENSION'] = np.int32(self.grid.attributes['nx'] + 1) + global_attributes['SOUTH-NORTH_GRID_DIMENSION'] = np.int32(self.grid.attributes['ny'] + 1) + global_attributes['DX'] = np.float32(self.grid.attributes['inc_x']) + global_attributes['DY'] = np.float32(self.grid.attributes['inc_y']) + global_attributes['SURFACE_INPUT_SOURCE'] = np.int32(1) + global_attributes['WEST-EAST_PATCH_START_UNSTAG'] = np.int32(1) + global_attributes['WEST-EAST_PATCH_END_UNSTAG'] = np.int32(self.grid.attributes['nx']) + global_attributes['WEST-EAST_PATCH_START_STAG'] = np.int32(1) + global_attributes['WEST-EAST_PATCH_END_STAG'] = np.int32(self.grid.attributes['nx'] + 1) + global_attributes['SOUTH-NORTH_PATCH_START_UNSTAG'] = np.int32(1) + global_attributes['SOUTH-NORTH_PATCH_END_UNSTAG'] = np.int32(self.grid.attributes['ny']) + global_attributes['SOUTH-NORTH_PATCH_START_STAG'] = np.int32(1) + global_attributes['SOUTH-NORTH_PATCH_END_STAG'] = np.int32(self.grid.attributes['ny'] + 1) + + global_attributes['POLE_LAT'] = np.float32(90) + global_attributes['POLE_LON'] = np.float32(0) + + if self.grid.grid_type == 'Lambert Conformal Conic': + global_attributes['MAP_PROJ'] = np.int32(1) + global_attributes['TRUELAT1'] = np.float32(self.grid.attributes['lat_1']) + global_attributes['TRUELAT2'] = np.float32(self.grid.attributes['lat_2']) + global_attributes['MOAD_CEN_LAT'] = np.float32(self.grid.attributes['lat_0']) + global_attributes['STAND_LON'] = np.float32(self.grid.attributes['lon_0']) + elif self.grid.grid_type == 'Mercator': + global_attributes['MAP_PROJ'] = np.int32(3) + global_attributes['TRUELAT1'] = np.float32(self.grid.attributes['lat_ts']) + global_attributes['TRUELAT2'] = np.float32(0) + global_attributes['MOAD_CEN_LAT'] = np.float32(self.grid.attributes['lat_ts']) + global_attributes['STAND_LON'] = np.float32(self.grid.attributes['lon_0']) + + self.logger.write_time_log('WrfChemWriter', 'create_global_attributes', timeit.default_timer() - spent_time) + return global_attributes + + def create_times_var(self): + # TODO Documentation + """ + + :return: + """ + import netCDF4 + + aux_times_list = [] + + for date in self.date_array: + aux_times_list.append(date.strftime("%Y-%m-%d_%H:%M:%S")) + + str_out = netCDF4.stringtochar(np.array(aux_times_list)) + return str_out + + def write_netcdf(self, emissions): + """ + Create a NetCDF following the WRF-Chem conventions + + :param emissions: Emissions to write in the NetCDF with 'FID, level & time step as index and pollutant as + columns. + :type emissions: DataFrame + """ + spent_time = timeit.default_timer() + + netcdf = Dataset(self.netcdf_path, mode='w', parallel=True, comm=self.comm_write, info=MPI.Info()) + + # ===== DIMENSIONS ===== + self.logger.write_log('\tCreating NetCDF dimensions', message_level=2) + netcdf.createDimension('Time', len(self.date_array)) + + netcdf.createDimension('DateStrLen', 19) + netcdf.createDimension('west_east', self.grid.center_longitudes.shape[1]) + netcdf.createDimension('south_north', self.grid.center_latitudes.shape[0]) + netcdf.createDimension('emissions_zdim', len(self.grid.vertical_desctiption)) + + # ========== VARIABLES ========== + self.logger.write_log('\tCreating NetCDF variables', message_level=2) + times = netcdf.createVariable('Times', 'S1', ('Time', 'DateStrLen',)) + times[:] = self.create_times_var() + + # ========== POLLUTANTS ========== + for var_name in emissions.columns.values: + self.logger.write_log('\t\tCreating {0} variable'.format(var_name), message_level=3) + + var_data = self.dataframe_to_array(emissions.loc[:, [var_name]]) + var = netcdf.createVariable(var_name, np.float64, ('Time', 'emissions_zdim', 'south_north', 'west_east',)) + var[:, :, + self.rank_distribution[self.comm_write.Get_rank()]['y_min']: + self.rank_distribution[self.comm_write.Get_rank()]['y_max'], + self.rank_distribution[self.comm_write.Get_rank()]['x_min']: + self.rank_distribution[self.comm_write.Get_rank()]['x_max']] = var_data + + var.FieldType = self.pollutant_info.loc[var_name, 'FieldType'] + var.MemoryOrder = self.pollutant_info.loc[var_name, 'MemoryOrder'] + var.description = self.pollutant_info.loc[var_name, 'description'] + var.units = self.pollutant_info.loc[var_name, 'units'] + var.stagger = self.pollutant_info.loc[var_name, 'stagger'] + var.coordinates = self.pollutant_info.loc[var_name, 'coordinates'] + + # ========== METADATA ========== + self.logger.write_log('\tCreating NetCDF metadata', message_level=2) + + for attribute in self.global_attributes_order: + netcdf.setncattr(attribute, self.global_attributes[attribute]) + + netcdf.close() + self.logger.write_log('NetCDF write at {0}'.format(self.netcdf_path)) + self.logger.write_time_log('WrfChemWriter', 'write_netcdf', timeit.default_timer() - spent_time) + + return True diff --git a/hermesv3_bu/writer/writer.py b/hermesv3_bu/writer/writer.py new file mode 100755 index 0000000000000000000000000000000000000000..5b44d0263497c029020e96ac86bcde2fe7bbd9f1 --- /dev/null +++ b/hermesv3_bu/writer/writer.py @@ -0,0 +1,451 @@ +#!/usr/bin/env python + +import os +import sys +import numpy as np +import pandas as pd +from mpi4py import MPI +from warnings import warn +import timeit +from hermesv3_bu.logger.log import Log + +CHUNKING = True +BALANCED = False +MPI_TAG_CONSTANT = 10**6 + + +def select_writer(logger, arguments, grid, date_array): + """ + Select the writer depending on the arguments passed to HERMESv3_BU + + :param logger: Logger + :type logger: Log + + :param arguments: Arguments passed to HERMESv3_BU + :type arguments: Namespace + + :param grid: Output grid definition. + :type grid: hermesv3_bu.grids.grid.Grid + + :param date_array: Array with each time step to be calculated. + :type date_array: list of datetime.datetime + + :return: Selected writer. + :rtype: Writer + """ + spent_time = timeit.default_timer() + comm_world = MPI.COMM_WORLD + + if grid.shape[2] % 2 == 0: + max_procs = grid.shape[2] // 2 + else: + max_procs = (grid.shape[2] // 2) + 1 + + if arguments.writing_processors > min((comm_world.Get_size(), max_procs)): + warn('Exceeded maximum of writing processors. Setting it to {0}'.format( + min((comm_world.Get_size(), max_procs)))) + + arguments.writing_processors = min((comm_world.Get_size(), max_procs)) + + if BALANCED: + rank_distribution = get_balanced_distribution(logger, arguments.writing_processors, grid.shape) + + else: + rank_distribution = get_distribution(logger, arguments.writing_processors, grid.shape) + + if comm_world.Get_rank() < arguments.writing_processors: + color = 99 + else: + color = 0 + + comm_write = comm_world.Split(color, comm_world.Get_rank()) + + pollutant_info = pd.read_csv(arguments.speciation_map, usecols=['dst', 'description', 'units'], index_col='dst') + pollutant_info = pollutant_info.loc[~pollutant_info.index.duplicated(keep='first')] + + if arguments.output_model == 'DEFAULT': + from hermesv3_bu.writer.default_writer import DefaultWriter + writer = DefaultWriter(comm_world, comm_write, logger, arguments.output_name, grid, date_array, pollutant_info, + rank_distribution, arguments.emission_summary) + elif arguments.output_model == 'MONARCH': + from hermesv3_bu.writer.monarch_writer import MonarchWriter + writer = MonarchWriter(comm_world, comm_write, logger, arguments.output_name, grid, date_array, pollutant_info, + rank_distribution, arguments.emission_summary) + elif arguments.output_model == 'CMAQ': + from hermesv3_bu.writer.cmaq_writer import CmaqWriter + writer = CmaqWriter(comm_world, comm_write, logger, arguments.output_name, grid, date_array, pollutant_info, + rank_distribution, arguments.output_attributes, arguments.emission_summary) + elif arguments.output_model == 'WRF_CHEM': + from hermesv3_bu.writer.wrfchem_writer import WrfChemWriter + writer = WrfChemWriter(comm_world, comm_write, logger, arguments.output_name, grid, date_array, pollutant_info, + rank_distribution, arguments.output_attributes, arguments.emission_summary) + else: + raise TypeError("Unknown output model '{0}'. ".format(arguments.output_model) + + "Only MONARCH, CMAQ, WRF_CHEM or DEFAULT writers are available") + + logger.write_time_log('Writer', 'select_writer', timeit.default_timer() - spent_time) + return writer + + +def get_distribution(logger, processors, shape): + """ + Calculate the process distribution for writing. + + :param logger: Logger + :type logger: Log + + :param processors: Number of writing processors. + :type processors: int + + :param shape: Complete shape of the destiny domain. + :type shape: tuple + + :return: Information of the writing process. That argument is a dictionary with the writing + process rank as key and another dictionary as value. That other dictionary contains: + - shape: Shape to write + - x_min: X minimum position to write on the full array. + - x_max: X maximum position to write on the full array. + - y_min: Y minimum position to write on the full array. + - y_max: Y maximum position to write on the full array. + - fid_min: Minimum cell ID of a flatten X Y domain. + - fid_max: Maximum cell ID of a flatten X Y domain. + + e.g. 24 time steps. 48 vertical levels, 10 x 10 + {0: {'fid_min': 0, 'y_min': 0, 'y_max': 5, 'fid_max': 50, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}, + 1: {'fid_min': 50, 'y_min': 5, 'y_max': 10, 'fid_max': 100, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}} + :rtype rank_distribution: dict + """ + spent_time = timeit.default_timer() + fid_dist = {} + total_rows = shape[2] + + aux_rows = total_rows // processors + if total_rows % processors > 0: + aux_rows += 1 + + if aux_rows * (processors - 1) >= total_rows: + aux_rows -= 1 + + rows_sum = 0 + for proc in xrange(processors): + total_rows -= aux_rows + if total_rows < 0 or proc == processors - 1: + rows = total_rows + aux_rows + else: + rows = aux_rows + + min_fid = proc * aux_rows * shape[3] + max_fid = (proc + 1) * aux_rows * shape[3] + + fid_dist[proc] = { + 'y_min': rows_sum, + 'y_max': rows_sum + rows, + 'x_min': 0, + 'x_max': shape[3], + 'fid_min': min_fid, + 'fid_max': max_fid, + 'shape': (shape[0], shape[1], rows, shape[3]), + } + + rows_sum += rows + + logger.write_time_log('Writer', 'get_distribution', timeit.default_timer() - spent_time) + return fid_dist + + +def get_balanced_distribution(logger, processors, shape): + """ + Calculate the process distribution for writing. + + :param logger: Logger + :type logger: Log + + :param processors: Number of writing processors. + :type processors: int + + :param shape: Complete shape of the destiny domain. + :type shape: tuple + + :return: Information of the writing process. That argument is a dictionary with the writing + process rank as key and another dictionary as value. That other dictionary contains: + - shape: Shape to write + - x_min: X minimum position to write on the full array. + - x_max: X maximum position to write on the full array. + - y_min: Y minimum position to write on the full array. + - y_max: Y maximum position to write on the full array. + - fid_min: Minimum cell ID of a flatten X Y domain. + - fid_max: Maximum cell ID of a flatten X Y domain. + + e.g. 24 time steps. 48 vertical levels, 10 x 10 + {0: {'fid_min': 0, 'y_min': 0, 'y_max': 5, 'fid_max': 50, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}, + 1: {'fid_min': 50, 'y_min': 5, 'y_max': 10, 'fid_max': 100, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}} + :rtype rank_distribution: dict + """ + spent_time = timeit.default_timer() + fid_dist = {} + total_rows = shape[2] + + procs_rows = total_rows // processors + procs_rows_extended = total_rows-(procs_rows*processors) + + rows_sum = 0 + for proc in xrange(processors): + if proc < procs_rows_extended: + aux_rows = procs_rows + 1 + else: + aux_rows = procs_rows + + total_rows -= aux_rows + if total_rows < 0: + rows = total_rows + aux_rows + else: + rows = aux_rows + + min_fid = proc * aux_rows * shape[3] + max_fid = (proc + 1) * aux_rows * shape[3] + + fid_dist[proc] = { + 'y_min': rows_sum, + 'y_max': rows_sum + rows, + 'x_min': 0, + 'x_max': shape[3], + 'fid_min': min_fid, + 'fid_max': max_fid, + 'shape': (shape[0], shape[1], rows, shape[3]), + } + + rows_sum += rows + + logger.write_time_log('Writer', 'get_distribution', timeit.default_timer() - spent_time) + return fid_dist + + +class Writer(object): + def __init__(self, comm_world, comm_write, logger, netcdf_path, grid, date_array, pollutant_info, + rank_distribution, emission_summary=False): + """ + Initialise the Writer class. + + :param comm_world: Global communicator for all the calculation process + :type comm_world: MPI.COMM + + :param comm_write: Sector communicator. + :type comm_write: MPI.Intracomm + + :param logger: Logger + :type logger: Log + + :param netcdf_path: Path to the output NetCDF file- + :type netcdf_path: str + + :param grid: Output grid definition. + :type grid: hermesv3_bu.grids.grid.Grid + + :param date_array: Array with each time step to be calculated. + :type date_array: list of datetime.datetime + + :param pollutant_info: Information related with the output pollutants, short description, units... + :type pollutant_info: DataFrame + + :param rank_distribution: Information of the writing process. That argument is a dictionary with the writing + process rank as key and another dictionary as value. That other dictionary contains: + - shape: Shape to write + - x_min: X minimum position to write on the full array. + - x_max: X maximum position to write on the full array. + - y_min: Y minimum position to write on the full array. + - y_max: Y maximum position to write on the full array. + - fid_min: Minimum cell ID of a flatten X Y domain. + - fid_max: Maximum cell ID of a flatten X Y domain. + + e.g. 24 time steps. 48 vertical levels, 10 x 10 + {0: {'fid_min': 0, 'y_min': 0, 'y_max': 5, 'fid_max': 50, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}, + 1: {'fid_min': 50, 'y_min': 5, 'y_max': 10, 'fid_max': 100, 'shape': (24, 48, 5, 10), 'x_max': 10, + 'x_min': 0}} + :type rank_distribution: dict + """ + spent_time = timeit.default_timer() + + self.comm_world = comm_world + self.comm_write = comm_write + self.logger = logger + self.netcdf_path = netcdf_path + self.grid = grid + self.date_array = date_array + self.pollutant_info = pollutant_info + self.rank_distribution = rank_distribution + self.emission_summary = emission_summary + + if self.emission_summary and self.comm_write.Get_rank() == 0: + self.emission_summary_paths = { + 'hourly_layer_summary_path': self.netcdf_path.replace('.nc', '_summary_hourly_layer.csv'), + 'hourly_summary_path': self.netcdf_path.replace('.nc', '_summary_hourly.csv'), + 'total_summary_path': self.netcdf_path.replace('.nc', '_summary.csv') + } + else: + self.emission_summary_paths = None + + self.logger.write_time_log('Writer', '__init__', timeit.default_timer() - spent_time) + + def gather_emissions(self, emissions): + """ + Each writing process recives the emissions for a concrete region of the domain. + + Each calculation process sends a part of the emissions to each writing processor. + + :param emissions: Emissions to be split and sent to the writing processors. + :type emissions: DataFrame + + :return: The writing processors will return the emissions to write but the non writer processors will return + None. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + # Sending + self.logger.write_log('Sending emissions to the writing processors.', message_level=2) + requests = [] + for w_rank, info in self.rank_distribution.iteritems(): + partial_emis = emissions.loc[(emissions.index.get_level_values(0) >= info['fid_min']) & + (emissions.index.get_level_values(0) < info['fid_max'])] + + self.logger.write_log('\tFrom {0} sending {1} to {2}'.format( + self.comm_world.Get_rank(), sys.getsizeof(partial_emis), w_rank), message_level=3) + # requests.append(self.comm_world.isend(sys.getsizeof(partial_emis), dest=w_rank, + # tag=self.comm_world.Get_rank() + MPI_TAG_CONSTANT)) + requests.append(self.comm_world.isend(partial_emis, dest=w_rank, tag=self.comm_world.Get_rank())) + + # Receiving + self.logger.write_log('Receiving emissions in the writing processors.', message_level=2) + if self.comm_world.Get_rank() in self.rank_distribution.iterkeys(): + self.logger.write_log("I'm a writing processor.", message_level=3) + data_list = [] + + self.logger.write_log("Prepared to receive", message_level=3) + for i_rank in xrange(self.comm_world.Get_size()): + # print self.rank_distribution[i_rank] + # print reduce(lambda x, y: x * y, self.rank_distribution[i_rank]['shape']) + # req = self.comm_world.irecv(source=i_rank, tag=i_rank + MPI_TAG_CONSTANT) + # data_size = req.wait() + + self.logger.write_log( + '\tFrom {0} to {1}'.format(i_rank, self.comm_world.Get_rank()), message_level=3) + req = self.comm_world.irecv(2**27, source=i_rank, tag=i_rank) + dataframe = req.wait() + data_list.append(dataframe.reset_index()) + # print "I'm Rank {0} DataList: \n {1}".format(self.comm_world.Get_rank(), data_list) + # new_emissions = pd.concat(data_list).reset_index().groupby(['FID', 'layer', 'tstep']).sum() + new_emissions = pd.concat(data_list) + new_emissions[['FID', 'layer', 'tstep']] = new_emissions[['FID', 'layer', 'tstep']].astype(np.int32) + # new_emissions.reset_index(inplace=True) + + new_emissions = new_emissions.groupby(['FID', 'layer', 'tstep']).sum() + # try: + # new_emissions = new_emissions.groupby(['FID', 'layer', 'tstep']).sum() + # except KeyError as e: + # print "I'm Rank {0} ERROR on: \n {1}".format(self.comm_world.Get_rank(), new_emissions) + # raise e + else: + new_emissions = None + self.comm_world.Barrier() + self.logger.write_log('All emissions received.', message_level=2) + + if self.emission_summary and self.comm_world.Get_rank() in self.rank_distribution.iterkeys(): + self.make_summary(new_emissions) + + self.logger.write_time_log('Writer', 'gather_emissions', timeit.default_timer() - spent_time) + + return new_emissions + + def dataframe_to_array(self, dataframe): + """ + Set the dataframe emissions to a 4D numpy array in the way taht have to be written. + + :param dataframe: Dataframe with the FID, level and time step as index and pollutant as columns. + :type dataframe: DataFrame + + :return: 4D array with the emissions to be written. + :rtype: numpy.array + """ + spent_time = timeit.default_timer() + var_name = dataframe.columns.values[0] + shape = self.rank_distribution[self.comm_write.Get_rank()]['shape'] + dataframe.reset_index(inplace=True) + dataframe['FID'] = dataframe['FID'] - self.rank_distribution[self.comm_write.Get_rank()]['fid_min'] + data = np.zeros((shape[0], shape[1], shape[2] * shape[3])) + + for (layer, tstep), aux_df in dataframe.groupby(['layer', 'tstep']): + data[tstep, layer, aux_df['FID']] = aux_df[var_name] + self.logger.write_time_log('Writer', 'dataframe_to_array', timeit.default_timer() - spent_time) + + return data.reshape(shape) + + def write(self, emissions): + """ + Do all the process to write the emissions. + + :param emissions: Emissions to be written. + :type emissions: DataFrame + + :return: True if everything finish OK. + :rtype: bool + """ + spent_time = timeit.default_timer() + emissions = self.unit_change(emissions) + emissions = self.gather_emissions(emissions) + if self.comm_world.Get_rank() in self.rank_distribution.iterkeys(): + self.write_netcdf(emissions) + + self.comm_world.Barrier() + self.logger.write_time_log('Writer', 'write', timeit.default_timer() - spent_time) + + return True + + def unit_change(self, emissions): + """ + Implemented on the inner classes + + :rtype: DataFrame + """ + pass + + def write_netcdf(self, emissions): + """ + Implemented on the inner classes + """ + pass + + def make_summary(self, emissions): + """ + Create the files with the summary of the emissions. + + It will create 3 files: + - Total emissions per pollutant + - Total emissions per pollutant and hour + - Total emissions per pollutant, hour and layer + + :param emissions: Emissions + :type emissions: DataFrame + + :return: True if everything goes OK + :rtype: bool + """ + spent_time = timeit.default_timer() + + summary = emissions.groupby(['tstep', 'layer']).sum().reset_index() + + summary = self.comm_write.gather(summary, root=0) + + if self.comm_write.Get_rank() == 0: + summary = pd.concat(summary) + summary = summary.groupby(['tstep', 'layer']).sum() + summary.to_csv(self.emission_summary_paths['hourly_layer_summary_path']) + summary.reset_index(inplace=True) + summary.drop(columns=['layer'], inplace=True) + summary.groupby('tstep').sum().to_csv(self.emission_summary_paths['hourly_summary_path']) + summary.drop(columns=['tstep'], inplace=True) + summary.sum().to_csv(self.emission_summary_paths['total_summary_path']) + self.logger.write_time_log('Writer', 'make_summary', timeit.default_timer() - spent_time) diff --git a/run_test.py b/run_test.py new file mode 100755 index 0000000000000000000000000000000000000000..f12df676af52dbf9353c11cd2f40e8f84f625a96 --- /dev/null +++ b/run_test.py @@ -0,0 +1,24 @@ +# coding=utf-8 +"""Script to run the tests for HERMESv3_BU and generate the code coverage report""" + +import os +import sys +import pytest + + +work_path = os.path.abspath(os.path.join(os.path.dirname(__file__))) +os.chdir(work_path) +print(work_path) + + +version = sys.version_info[0] +report_dir = 'tests/report/python{}'.format(version) +errno = pytest.main([ + 'tests', + '--ignore=tests/report', + '--cov=hermesv3_bu', + '--cov-report=term', + '--cov-report=html:{}/coverage_html'.format(report_dir), + '--cov-report=xml:{}/coverage.xml'.format(report_dir), +]) +sys.exit(errno) diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 05a1a9435e035e33f8b71bafaaf8d206a9b34644..1472ab30ddee0cbb7fa4837b6301d22e58aefb58 --- a/setup.py +++ b/setup.py @@ -1,7 +1,5 @@ #!/usr/bin/env python -# TODO add license - from setuptools import find_packages from setuptools import setup from hermesv3_bu import __version__ @@ -14,7 +12,7 @@ with open("README.md", "r") as f: long_description = f.read() setup( - name='hermesv3_gr', + name='hermesv3_bu', # license='', # platforms=['GNU/Linux Debian'], version=version, @@ -23,10 +21,9 @@ setup( long_description_content_type="text/markdown", author='Carles Tena Medina', author_email='carles.tena@bsc.es', - url='https://earth.bsc.es/gitlab/es/hermesv3_gr', + url='https://earth.bsc.es/gitlab/es/hermesv3_bu', keywords=['emissions', 'cmaq', 'monarch', 'wrf-chem', 'atmospheric composition', 'air quality', 'earth science'], - # setup_requires=['pyproj'], install_requires=[ 'numpy', 'netCDF4>=1.3.1', @@ -43,6 +40,8 @@ setup( 'timezonefinder', 'mpi4py', 'pytest', + 'shapely', + 'rasterio', ], packages=find_packages(), classifiers=[ @@ -60,7 +59,7 @@ setup( entry_points={ 'console_scripts': [ - 'hermesv3_bu = hermesv3_gr.hermes:run', + 'hermesv3_bu = hermesv3_bu.hermes:run', ], }, ) \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/unit/test_lint.py b/tests/unit/test_lint.py new file mode 100755 index 0000000000000000000000000000000000000000..f7d9918aea4cd6758b8ebb05ac31cd668c1eccd4 --- /dev/null +++ b/tests/unit/test_lint.py @@ -0,0 +1,33 @@ +""" Lint tests """ +import os +import unittest + +import pycodestyle # formerly known as pep8 + + +class TestLint(unittest.TestCase): + + def test_pep8_conformance(self): + """Test that we conform to PEP-8.""" + + check_paths = [ + 'hermesv3_bu', + 'tests', + ] + exclude_paths = [ + + ] + + print("PEP8 check of directories: {}\n".format(', '.join(check_paths))) + + # Get paths wrt package root + package_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + for paths in (check_paths, exclude_paths): + for i, path in enumerate(paths): + paths[i] = os.path.join(package_root, path) + + style = pycodestyle.StyleGuide() + style.options.exclude.extend(exclude_paths) + style.options.max_line_length = 120 + + self.assertEqual(style.check_files(check_paths).total_errors, 0)