diff --git a/conf/hermes.conf b/conf/hermes.conf index a2bd0a3acdeb2084405b8743f04a5f50f5c76c5e..5a5c8a92976d35207cac2ace040423fde003c649 100755 --- a/conf/hermes.conf +++ b/conf/hermes.conf @@ -2,14 +2,14 @@ log_level = 3 input_dir = /home/Earth/ctena/Models/hermesv3_bu_data data_path = /esarchive/recon -output_dir = /scratch/Earth/HERMESv3_BU_OUT -output_name = HERMES_.nc +output_dir = /scratch/Earth/HERMESv3_BU_OUT/py3 +output_name = HERMES__py3_traffic_cmaq.nc emission_summary = 0 start_date = 2016/11/29 00:00:00 # ----- end_date = start_date [DEFAULT] ----- # end_date = 2010/01/01 00:00:00 output_timestep_num = 25 -auxiliary_files_path = /scratch/Earth/HERMESv3_BU_aux/_ +auxiliary_files_path = /scratch/Earth/HERMESv3_BU_aux/py3/_ erase_auxiliary_files = 0 @@ -18,7 +18,7 @@ erase_auxiliary_files = 0 # domain_type=[lcc, rotated, mercator, regular] domain_type = lcc # output_type=[MONARCH, CMAQ, WRF_CHEM, DEFAULT] -output_model = DEFAULT +output_model = CMAQ output_attributes = /writing/global_attributes_WRF-Chem.csv vertical_description = /profiles/vertical/MONARCH_Global_48layers_vertical_description.csv #vertical_description = /profiles/vertical/CMAQ_15layers_vertical_description.csv @@ -126,7 +126,7 @@ agricultural_machinery_processors = 0 residential_processors = 0 recreational_boats_processors = 0 point_sources_processors = 0 -traffic_processors = 0 +traffic_processors = 1 traffic_area_processors = 0 @@ -136,7 +136,7 @@ nut_shapefile_ccaa = /Shapefiles/CCAA/ES_CCAA.shp population_density_map = /jrc/ghsl/original_files/GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0.tif [SPECIATION DATA] -speciation_map = /profiles/speciation/map_base.csv +speciation_map = /profiles/speciation/map_cmaq_cb05_aero5.csv molecular_weights = /profiles/speciation/MolecularWeights.csv [METEO PATHS] @@ -160,7 +160,7 @@ layer_thickness_dir = /esarchive/exp/monarch/a1wd/regional/hourly/layer_thicknes [AVIATION SECTOR] # With 'hc' is calculated 'nmvoc' and 'ch4' aviation_source_pollutants = nox_no2, co, hc, so2, pm10, pm25, co2 -# airport_list = LEBL +# airport_list = # plane_list = airport_shapefile_path = /aviation/Airports.shp airport_runways_shapefile_path = /aviation/Runways.shp @@ -251,7 +251,9 @@ crop_machinery_speciation_profiles = /profiles/speciation/agricultura crop_machinery_by_nut = /agriculture/agricultural_machinery/crops_ha_prov_2017.csv [RESIDENTIAL] -fuel_list = B_res, B_com +fuel_list = HD_res, LPG_res, NG_res, HD_com, LPG_com, NG_com, B_res, B_com +# fuel_list = B_res, B_com +# fuel_list = HD_res, LPG_res, NG_res, HD_com, LPG_com, NG_com residential_source_pollutants = nox_no2, so2, co, nh3, pm10, pm25, nmvoc population_type_map = /jrc/ghsl/original_files/GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0.tif population_type_by_ccaa = /residential/pop_type_ccaa.csv diff --git a/environment.yml b/environment.yml index fc71b53fb3264847339c3d43aba843e8614b2232..584e815d9fbd3e5843f104f8f1f0bbab6c8866e2 100755 --- a/environment.yml +++ b/environment.yml @@ -23,5 +23,3 @@ dependencies: - pytest-cov - pycodestyle - shapely - - pip: - - holidays diff --git a/hermesv3_bu/config/config.py b/hermesv3_bu/config/config.py index 043ee0e4acc824eb254d1593529bcbe6a1e5eb85..3eab46da56fd30a8912695f25840f311567b9bbc 100755 --- a/hermesv3_bu/config/config.py +++ b/hermesv3_bu/config/config.py @@ -534,10 +534,9 @@ class Config(ArgParser): elif str_bool in false_options: return False else: - print 'WARNING: Boolean value not contemplated use {0} for True values and {1} for the False ones'.format( - true_options, false_options - ) - print '/t Using False as default' + print('WARNING: Boolean value not contemplated use {0} for True values and {1} for the False ones'.format( + true_options, false_options)) + print('/t Using False as default') return False def _parse_start_date(self, str_date): @@ -566,7 +565,7 @@ class Config(ArgParser): date = datetime.strptime(str_date, date_format) break except ValueError as e: - if e.message == 'day is out of range for month': + if str(e) == 'day is out of range for month': raise ValueError(e) if date is None: diff --git a/hermesv3_bu/grids/grid.py b/hermesv3_bu/grids/grid.py index 4269f20c660ab71f7842a0ea200d0ea5d62c3b6b..3b85db3eebec9f5eca63d60665d41e5c62f5f153 100755 --- a/hermesv3_bu/grids/grid.py +++ b/hermesv3_bu/grids/grid.py @@ -27,28 +27,28 @@ def select_grid(comm, logger, arguments): if arguments.domain_type == 'regular': from hermesv3_bu.grids.grid_latlon import LatLonGrid grid = LatLonGrid( - comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + logger, arguments.auxiliary_files_path, arguments.output_timestep_num, arguments.vertical_description, arguments.inc_lat, arguments.inc_lon, arguments.lat_orig, arguments.lon_orig, arguments.n_lat, arguments.n_lon) elif arguments.domain_type == 'lcc': from hermesv3_bu.grids.grid_lcc import LccGrid grid = LccGrid( - comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + logger, arguments.auxiliary_files_path, arguments.output_timestep_num, arguments.vertical_description, arguments.lat_1, arguments.lat_2, arguments.lon_0, arguments.lat_0, arguments.nx, arguments.ny, arguments.inc_x, arguments.inc_y, arguments.x_0, arguments.y_0) elif arguments.domain_type == 'rotated': from hermesv3_bu.grids.grid_rotated import RotatedGrid grid = RotatedGrid( - comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + logger, arguments.auxiliary_files_path, arguments.output_timestep_num, arguments.vertical_description, arguments.centre_lat, arguments.centre_lon, arguments.west_boundary, arguments.south_boundary, arguments.inc_rlat, arguments.inc_rlon) elif arguments.domain_type == 'mercator': from hermesv3_bu.grids.grid_mercator import MercatorGrid grid = MercatorGrid( - comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + logger, arguments.auxiliary_files_path, arguments.output_timestep_num, arguments.vertical_description, arguments.lat_ts, arguments.lon_0, arguments.nx, arguments.ny, arguments.inc_x, arguments.inc_y, arguments.x_0, arguments.y_0) @@ -58,13 +58,14 @@ def select_grid(comm, logger, arguments): grid = None grid = comm.bcast(grid, root=0) + logger.write_time_log('Grid', 'select_grid', timeit.default_timer() - spent_time) return grid class Grid(object): - def __init__(self, comm, logger, attributes, auxiliary_path, vertical_description_path): + def __init__(self, logger, attributes, auxiliary_path, vertical_description_path): """ Initialise the Grid class @@ -81,7 +82,6 @@ class Grid(object): :type vertical_description_path: str """ spent_time = timeit.default_timer() - self.comm = comm self.logger = logger self.logger.write_log('\tGrid specifications: {0}'.format(attributes), 3) self.attributes = attributes @@ -181,7 +181,6 @@ class Grid(object): :rtype: GeoDataFrame """ import geopandas as gpd - import pandas as pd from shapely.geometry import Polygon spent_time = timeit.default_timer() @@ -217,38 +216,24 @@ class Grid(object): aux_b_lats = y.reshape((y.shape[0] * y.shape[1], y.shape[2])) aux_b_lons = x.reshape((x.shape[0] * x.shape[1], x.shape[2])) - + gdf = gpd.GeoDataFrame(index=range(aux_b_lons.shape[0]), crs={'init': 'epsg:4326'}) + gdf['geometry'] = None # Create one dataframe with 8 columns, 4 points with two coordinates each one - df_lats = pd.DataFrame(aux_b_lats, columns=['b_lat_1', 'b_lat_2', 'b_lat_3', 'b_lat_4']) - df_lons = pd.DataFrame(aux_b_lons, columns=['b_lon_1', 'b_lon_2', 'b_lon_3', 'b_lon_4']) - df = pd.concat([df_lats, df_lons], axis=1) - - # Substituate 8 columns by 4 with the two coordinates - df['p1'] = zip(df.b_lon_1, df.b_lat_1) - del df['b_lat_1'], df['b_lon_1'] - df['p2'] = zip(df.b_lon_2, df.b_lat_2) - del df['b_lat_2'], df['b_lon_2'] - df['p3'] = zip(df.b_lon_3, df.b_lat_3) - del df['b_lat_3'], df['b_lon_3'] - df['p4'] = zip(df.b_lon_4, df.b_lat_4) - del df['b_lat_4'], df['b_lon_4'] - - # Make a list of list of tuples - list_points = df.values - del df['p1'], df['p2'], df['p3'], df['p4'] - - # List of polygons from the list of points - geometry = [Polygon(list(points)) for points in list_points] - - gdf = gpd.GeoDataFrame(index=df.index, crs={'init': 'epsg:4326'}, geometry=geometry) - gdf = gdf.to_crs(self.attributes['crs']) + for i in range(aux_b_lons.shape[0]): + gdf.loc[i, 'geometry'] = Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])]) + + gdf.to_crs(self.attributes['crs'], inplace=True) gdf['FID'] = gdf.index gdf.to_file(self.shapefile_path) else: gdf = gpd.read_file(self.shapefile_path) - # gdf.set_index('FID', inplace=True, drop=False) + gdf.set_index('FID', inplace=True) self.logger.write_time_log('Grid', 'create_shapefile', timeit.default_timer() - spent_time, 2) return gdf diff --git a/hermesv3_bu/grids/grid_latlon.py b/hermesv3_bu/grids/grid_latlon.py index 8b3def85528278cfdf111b300325e19d1d6706eb..8e1cd95a59c9132c9adca24efda28e1d70951150 100755 --- a/hermesv3_bu/grids/grid_latlon.py +++ b/hermesv3_bu/grids/grid_latlon.py @@ -29,7 +29,7 @@ from hermesv3_bu.logger.log import Log class LatLonGrid(Grid): - def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, inc_lat, inc_lon, lat_orig, + def __init__(self, logger, auxiliary_path, tstep_num, vertical_description_path, inc_lat, inc_lon, lat_orig, lon_orig, n_lat, n_lon): """ Regional regular lat-lon grid object that contains all the information to do a global output. @@ -70,7 +70,7 @@ class LatLonGrid(Grid): attributes = {'inc_lat': inc_lat, 'inc_lon': inc_lon, 'lat_orig': lat_orig, 'lon_orig': lon_orig, 'n_lat': n_lat, 'n_lon': n_lon, 'crs': {'init': 'epsg:4326'}} # Initialize the class using parent - super(LatLonGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + super(LatLonGrid, self).__init__(logger, attributes, auxiliary_path, vertical_description_path) self.shape = (tstep_num, len(self.vertical_desctiption), n_lat, n_lon) diff --git a/hermesv3_bu/grids/grid_lcc.py b/hermesv3_bu/grids/grid_lcc.py index c5ae6a0ec583988c461209424f8db613a289f3b6..ee6fdeebefbc578a8ddc40b896655d57904ca4db 100755 --- a/hermesv3_bu/grids/grid_lcc.py +++ b/hermesv3_bu/grids/grid_lcc.py @@ -4,14 +4,14 @@ import os import timeit import numpy as np from pyproj import Proj -from grid import Grid +from hermesv3_bu.grids.grid import Grid from hermesv3_bu.logger.log import Log class LccGrid(Grid): - def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, lat_1, lat_2, lon_0, lat_0, + def __init__(self, logger, auxiliary_path, tstep_num, vertical_description_path, lat_1, lat_2, lon_0, lat_0, nx, ny, inc_x, inc_y, x_0, y_0, earth_radius=6370000.000): """ Lambert Conformal Conic (LCC) grid object that contains all the information to do a lcc output. @@ -77,7 +77,7 @@ class LccGrid(Grid): lat_1, lat_2, lat_0, lon_0, 0, 0) + "+datum=WGS84 +units=m"} # Initialises with parent class - super(LccGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + super(LccGrid, self).__init__(logger, attributes, auxiliary_path, vertical_description_path) self.shape = (tstep_num, len(self.vertical_desctiption), ny, nx) self.logger.write_time_log('LccGrid', '__init__', timeit.default_timer() - spent_time) diff --git a/hermesv3_bu/grids/grid_mercator.py b/hermesv3_bu/grids/grid_mercator.py index 2c57d6536464423b42a83a2016e022747be1fd73..f03ccd74cf025b8521614350bc49a9ab05144593 100755 --- a/hermesv3_bu/grids/grid_mercator.py +++ b/hermesv3_bu/grids/grid_mercator.py @@ -10,7 +10,7 @@ from hermesv3_bu.logger.log import Log class MercatorGrid(Grid): - def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, lat_ts, lon_0, nx, ny, inc_x, + def __init__(self, logger, auxiliary_path, tstep_num, vertical_description_path, lat_ts, lon_0, nx, ny, inc_x, inc_y, x_0, y_0, earth_radius=6370000.000): """ Mercator grid object that contains all the information to do a mercator output. @@ -66,7 +66,7 @@ class MercatorGrid(Grid): self.y = None # Initialises with parent class - super(MercatorGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + super(MercatorGrid, self).__init__(logger, attributes, auxiliary_path, vertical_description_path) self.shape = (tstep_num, len(self.vertical_desctiption), ny, nx) self.logger.write_time_log('MercatorGrid', '__init__', timeit.default_timer() - spent_time, 3) diff --git a/hermesv3_bu/grids/grid_rotated.py b/hermesv3_bu/grids/grid_rotated.py index 3ddf526237621acfafdb02d31ed345ae8a417cfa..f8b1a1de6a771cf01f5e89ffae8e936cbb5d0d6d 100755 --- a/hermesv3_bu/grids/grid_rotated.py +++ b/hermesv3_bu/grids/grid_rotated.py @@ -10,7 +10,7 @@ from hermesv3_bu.logger.log import Log class RotatedGrid(Grid): - def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, centre_lat, centre_lon, + def __init__(self, logger, auxiliary_path, tstep_num, vertical_description_path, centre_lat, centre_lon, west_boundary, south_boundary, inc_rlat, inc_rlon): """ @@ -41,9 +41,9 @@ class RotatedGrid(Grid): 'n_lon': int((abs(west_boundary) / inc_rlon) * 2 + 1), 'crs': {'init': 'epsg:4326'}} # Initialises with parent class - super(RotatedGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + super(RotatedGrid, self).__init__(logger, attributes, auxiliary_path, vertical_description_path) - self.shape = (tstep_num, len(self.vertical_desctiption), len(self.rlat), len(self.rlon)) + self.shape = (tstep_num, len(self.vertical_desctiption), len(attributes['rlat']), len(attributes['rlon'])) self.logger.write_time_log('RotatedGrid', '__init__', timeit.default_timer() - spent_time, 3) def create_regular_rotated(self): @@ -134,7 +134,6 @@ class RotatedGrid(Grid): # sph = 1. # if sph < -1.: # sph = -1. - # print type(sph) sph[sph > 1.] = 1. sph[sph < -1.] = -1. diff --git a/hermesv3_bu/hermes.py b/hermesv3_bu/hermes.py index 8a8015ccccfd32e5c69b1064c41bf679d5ce6ada..d64e757d21007b2b3181063884d74cf7b6a3352f 100755 --- a/hermesv3_bu/hermes.py +++ b/hermesv3_bu/hermes.py @@ -23,13 +23,13 @@ class Hermes(object): self.comm = MPI.COMM_WORLD self.arguments = config.arguments - self.logger = Log(self.comm, self.arguments) + self.logger = Log(self.arguments) self.logger.write_log('====== Starting HERMESv3_BU simulation =====') self.grid = select_grid(self.comm, self.logger, self.arguments) self.clip = select_clip(self.comm, self.logger, self.arguments.auxiliary_files_path, self.arguments.clipping, self.grid) self.date_array = [self.arguments.start_date + timedelta(hours=hour) for hour in - xrange(self.arguments.output_timestep_num)] + range(self.arguments.output_timestep_num)] self.logger.write_log('Dates to simulate: {0}'.format( [aux_date.strftime("%Y/%m/%d, %H:%M:%S") for aux_date in self.date_array]), message_level=2) diff --git a/hermesv3_bu/io_server/io_netcdf.py b/hermesv3_bu/io_server/io_netcdf.py index ad47f657bdd50119963e6acad5956f548782904d..01d0e837aeeaa354a8f58df00b50f2392a2b4a1a 100755 --- a/hermesv3_bu/io_server/io_netcdf.py +++ b/hermesv3_bu/io_server/io_netcdf.py @@ -229,7 +229,7 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l netcdf.createDimension('lat', center_latitudes.shape[0]) lat_dim = ('lon', 'lat', ) else: - print 'ERROR: Latitudes must be on a 1D or 2D array instead of {0}'.format(len(center_latitudes.shape)) + print('ERROR: Latitudes must be on a 1D or 2D array instead of {0}'.format(len(center_latitudes.shape))) sys.exit(1) # Longitude @@ -240,21 +240,21 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l netcdf.createDimension('lon', center_longitudes.shape[1]) lon_dim = ('lon', 'lat', ) else: - print 'ERROR: Longitudes must be on a 1D or 2D array instead of {0}'.format(len(center_longitudes.shape)) + print('ERROR: Longitudes must be on a 1D or 2D array instead of {0}'.format(len(center_longitudes.shape))) sys.exit(1) elif rotated: var_dim = ('rlat', 'rlon',) # Rotated Latitude if rotated_lats is None: - print 'ERROR: For rotated grids is needed the rotated latitudes.' + print('ERROR: For rotated grids is needed the rotated latitudes.') sys.exit(1) netcdf.createDimension('rlat', len(rotated_lats)) lat_dim = ('rlat', 'rlon',) # Rotated Longitude if rotated_lons is None: - print 'ERROR: For rotated grids is needed the rotated longitudes.' + print('ERROR: For rotated grids is needed the rotated longitudes.') sys.exit(1) netcdf.createDimension('rlon', len(rotated_lons)) lon_dim = ('rlat', 'rlon',) @@ -297,7 +297,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l else: time = netcdf.createVariable('time', 'd', ('time',), zlib=True) u = Unit('hours') - # print u.offset_by_time(encode_time(date.year, date.month, date.day, date.hour, date.minute, date.second)) # Unit('hour since 1970-01-01 00:00:00.0000000 UTC') time.units = str(u.offset_by_time(encode_time(date.year, date.month, date.day, date.hour, date.minute, date.second))) @@ -317,7 +316,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l if boundary_latitudes is not None: lats.bounds = "lat_bnds" lat_bnds = netcdf.createVariable('lat_bnds', 'f', lat_dim + ('nv',), zlib=True) - # print lat_bnds[:].shape, boundary_latitudes.shape lat_bnds[:] = boundary_latitudes # Longitude @@ -327,7 +325,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l lons.axis = "X" lons.long_name = "longitude coordinate" lons.standard_name = "longitude" - # print 'lons:', lons[:].shape, center_longitudes.shape lons[:] = center_longitudes if boundary_longitudes is not None: lons.bounds = "lon_bnds" @@ -375,7 +372,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l var = netcdf.createVariable('aux_var', 'f', ('time',) + var_dim, zlib=True) var[:] = 0 for variable in data_list: - # print ('time',) + var_dim var = netcdf.createVariable(variable['name'], 'f', ('time',) + var_dim, zlib=True) var.units = Unit(variable['units']).symbol if 'long_name' in variable: @@ -398,7 +394,7 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l try: var[:] = variable['data'] except ValueError: - print 'VAR ERROR, netcdf shape: {0}, variable shape: {1}'.format(var[:].shape, variable['data'].shape) + print('VAR ERROR, netcdf shape: {0}, variable shape: {1}'.format(var[:].shape, variable['data'].shape)) # Grid mapping if regular_latlon: @@ -433,7 +429,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l c_area.long_name = "area of the grid cell" c_area.standard_name = "cell_area" c_area.units = Unit("m2").symbol - # print c_area[:].shape, cell_area.shape c_area[:] = cell_area if global_attributes is not None: diff --git a/hermesv3_bu/io_server/io_raster.py b/hermesv3_bu/io_server/io_raster.py index 73dc937a6b1b199c87ef2679efda14f8443708c9..030c8b82c6852ee4c8a85f5e81581b002d999d2d 100755 --- a/hermesv3_bu/io_server/io_raster.py +++ b/hermesv3_bu/io_server/io_raster.py @@ -198,6 +198,78 @@ class IoRaster(IoServer): return gdf + def to_shapefile_serie_by_cell(self, raster_path, out_path=None, write=False, crs=None, nodata=0): + """ + + :param raster_path: + :param out_path: + :param write: + :param crs: + :param rank: + :param nodata: + :return: + """ + + if out_path is None or not os.path.exists(out_path): + ds = rasterio.open(raster_path) + + grid_info = ds.transform + # TODO remove when new version will be installed + if rasterio.__version__ == '0.36.0': + lons = np.arange(ds.width) * grid_info[1] + grid_info[0] + lats = np.arange(ds.height) * grid_info[5] + grid_info[3] + elif rasterio.__version__ == '1.0.21': + lons = np.arange(ds.width) * grid_info[0] + grid_info[2] + lats = np.arange(ds.height) * grid_info[4] + grid_info[5] + else: + lons = np.arange(ds.width) * grid_info[0] + grid_info[2] + lats = np.arange(ds.height) * grid_info[4] + grid_info[5] + + # 1D to 2D + c_lats = np.array([lats] * len(lons)).T.flatten() + c_lons = np.array([lons] * len(lats)).flatten() + + del lons, lats + if rasterio.__version__ == '0.36.0': + b_lons = self.create_bounds(c_lons, grid_info[1], number_vertices=4) + grid_info[1] / 2 + b_lats = self.create_bounds(c_lats, grid_info[1], number_vertices=4, inverse=True) + grid_info[5] / 2 + elif rasterio.__version__ == '1.0.21': + b_lons = self.create_bounds(c_lons, grid_info[0], number_vertices=4) + grid_info[0] / 2 + b_lats = self.create_bounds(c_lats, grid_info[4], number_vertices=4, inverse=True) + grid_info[4] / 2 + else: + b_lons = self.create_bounds(c_lons, grid_info[0], number_vertices=4) + grid_info[0] / 2 + b_lats = self.create_bounds(c_lats, grid_info[4], number_vertices=4, inverse=True) + grid_info[4] / 2 + + b_lats = b_lats.reshape((b_lats.shape[1], b_lats.shape[2])) + b_lons = b_lons.reshape((b_lons.shape[1], b_lons.shape[2])) + + gdf = gpd.GeoDataFrame(ds.read(1).flatten(), columns=['data'], index=range(b_lons.shape[0]), crs=ds.crs) + gdf['geometry'] = None + + for i in range(b_lons.shape[0]): + gdf.loc[i, 'geometry'] = Polygon([(b_lons[i, 0], b_lats[i, 0]), + (b_lons[i, 1], b_lats[i, 1]), + (b_lons[i, 2], b_lats[i, 2]), + (b_lons[i, 3], b_lats[i, 3]), + (b_lons[i, 0], b_lats[i, 0])]) + + gdf['CELL_ID'] = gdf.index + + gdf = gdf[gdf['data'] != nodata] + + if crs is not None: + gdf = gdf.to_crs(crs) + + if write: + if not os.path.exists(os.path.dirname(out_path)): + os.makedirs(os.path.dirname(out_path)) + gdf.to_file(out_path) + + else: + gdf = gpd.read_file(out_path) + + return gdf + def to_shapefile_serie(self, raster_path, out_path=None, write=False, crs=None, nodata=0): """ @@ -214,6 +286,7 @@ class IoRaster(IoServer): from rasterio.features import shapes mask = None src = rasterio.open(raster_path) + image = src.read(1) # first band image = image.astype(np.float32) geoms = ( @@ -222,10 +295,14 @@ class IoRaster(IoServer): gdf = gpd.GeoDataFrame.from_features(geoms) - gdf.loc[:, 'CELL_ID'] = xrange(len(gdf)) + gdf.loc[:, 'CELL_ID'] = range(len(gdf)) gdf = gdf[gdf['data'] != nodata] - gdf.crs = src.crs + # Error on to_crs function of geopandas that flip lat with lon in the non dict form + if src.crs == 'EPSG:4326': + gdf.crs = {'init': 'epsg:4326'} + else: + gdf.crs = src.crs if crs is not None: gdf = gdf.to_crs(crs) @@ -237,5 +314,5 @@ class IoRaster(IoServer): else: gdf = gpd.read_file(out_path) - + gdf.set_index('CELL_ID', inplace=True) return gdf diff --git a/hermesv3_bu/logger/log.py b/hermesv3_bu/logger/log.py index 5c1b3caf389744f52d9bda560a987be495131bc3..919c8f40b1599aaeb9c7e3c14b8e4ff9b079db08 100644 --- a/hermesv3_bu/logger/log.py +++ b/hermesv3_bu/logger/log.py @@ -4,22 +4,21 @@ import os import numpy as np import pandas as pd +from mpi4py import MPI +comm = MPI.COMM_WORLD + class Log(object): - def __init__(self, comm, arguments, log_refresh=1, time_log_refresh=0): + def __init__(self, arguments, log_refresh=1, time_log_refresh=0): """ Initialise the Log class. - :param comm: MPI communicator - :param arguments: Complete argument NameSpace. :type arguments: NameSpace :param log_refresh: :param time_log_refresh: """ - self.comm = comm - self.refresh_rate = (log_refresh, time_log_refresh) self.log_refresh = self.refresh_rate[0] self.time_log_refresh = self.refresh_rate[1] @@ -36,7 +35,7 @@ class Log(object): else: if os.path.exists(self.time_log_path): os.remove(self.time_log_path) - self.time_log = open(self.time_log_path, mode='w') + # self.time_log = open(self.time_log_path, mode='w') else: # Time log only writed by master process self.time_log = None @@ -45,7 +44,7 @@ class Log(object): if os.path.exists(self.log_path): os.remove(self.log_path) - self.log = open(self.log_path, mode='w') + # self.log = open(self.log_path, mode='w') self.df_times = pd.DataFrame(columns=['Class', 'Function', comm.Get_rank()]) @@ -65,13 +64,10 @@ class Log(object): :rtype: bool """ if message_level <= self.log_level: - self.log.write("{0}\n".format(message)) + with open(self.log_path, mode='a') as log_file: + log_file.write("{0}\n".format(message)) + log_file.close() - if self.log_refresh > 0: - self.log_refresh -= 1 - if self.log_refresh == 0: - self.log.flush() - self.log_refresh = self.refresh_rate[0] return True def _write_csv_times_log_file(self, rank=0): @@ -84,20 +80,21 @@ class Log(object): :return: True if everything is ok. :rtype: bool """ + from functools import reduce self.df_times = self.df_times.groupby(['Class', 'Function']).sum().reset_index() - data_frames = self.comm.gather(self.df_times, root=0) - if self.comm.Get_rank() == rank: + data_frames = comm.gather(self.df_times, root=0) + if comm.Get_rank() == rank: df_merged = reduce(lambda left, right: pd.merge(left, right, on=['Class', 'Function'], how='outer'), data_frames) df_merged = df_merged.groupby(['Class', 'Function']).sum() - df_merged['min'] = df_merged.loc[:, range(self.comm.Get_size())].min(axis=1) - df_merged['max'] = df_merged.loc[:, range(self.comm.Get_size())].max(axis=1) - df_merged['mean'] = df_merged.loc[:, range(self.comm.Get_size())].mean(axis=1) + df_merged['min'] = df_merged.loc[:, range(comm.Get_size())].min(axis=1) + df_merged['max'] = df_merged.loc[:, range(comm.Get_size())].max(axis=1) + df_merged['mean'] = df_merged.loc[:, range(comm.Get_size())].mean(axis=1) df_merged = df_merged.replace(0.0, np.NaN) df_merged.to_csv(self.time_log_path) - self.comm.Barrier() + comm.Barrier() return True def write_time_log(self, class_name, function_name, spent_time, message_level=1): @@ -121,7 +118,7 @@ class Log(object): """ if message_level <= self.log_level: self.df_times = self.df_times.append( - {'Class': class_name, 'Function': function_name, self.comm.Get_rank(): spent_time}, ignore_index=True) + {'Class': class_name, 'Function': function_name, comm.Get_rank(): spent_time}, ignore_index=True) # if self.time_log_refresh > 0: # self.time_log_refresh -= 1 # if self.time_log_refresh == 0: @@ -137,5 +134,5 @@ class Log(object): :return: """ self._write_csv_times_log_file() - self.log.flush() - self.log.close() + # self.log.flush() + # self.log.close() diff --git a/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py b/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py index dcf79751df6bdd887727377c376a510e68d2e0c0..c7eb22996211460c35aea178916b013edaf011c9 100755 --- a/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py +++ b/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py @@ -130,7 +130,8 @@ class AgriculturalCropFertilizersSector(AgriculturalSector): def to_dst_resolution(self, src_shapefile, value): spent_time = timeit.default_timer() - intersection = self.spatial_overlays(src_shapefile.to_crs(self.grid_shp.crs), self.grid_shp.reset_index()) + intersection = self.spatial_overlays(src_shapefile.to_crs(self.grid_shp.crs).reset_index(), + self.grid_shp.reset_index()) intersection['area'] = intersection.geometry.area dst_shapefile = self.grid_shp.reset_index().copy() dst_shapefile['involved_area'] = intersection.groupby('FID')['area'].sum() @@ -425,7 +426,7 @@ class AgriculturalCropFertilizersSector(AgriculturalSector): spent_time = timeit.default_timer() self.logger.write_log('Calculating daily emissions') df_by_day = self.get_daily_inputs(emissions) - for day, daily_inputs in df_by_day.iteritems(): + for day, daily_inputs in df_by_day.items(): df_by_day[day] = self.calculate_nh3_emissions(day, daily_inputs) self.logger.write_time_log('AgriculturalCropFertilizersSector', 'calculate_daily_emissions', timeit.default_timer() - spent_time) diff --git a/hermesv3_bu/sectors/agricultural_crop_operations_sector.py b/hermesv3_bu/sectors/agricultural_crop_operations_sector.py index 70c69fc69082532807a78693ae40d1d7982d1635..bb2386098ae80c1d4cf14844a98d1bedfd55ff9d 100755 --- a/hermesv3_bu/sectors/agricultural_crop_operations_sector.py +++ b/hermesv3_bu/sectors/agricultural_crop_operations_sector.py @@ -240,7 +240,7 @@ class AgriculturalCropOperationsSector(AgriculturalSector): self.logger.write_log('\tCalculating emissions') distribution_by_month = {} - for month in self.months.iterkeys(): + for month in self.months.keys(): distribution_by_month[month] = self.calculate_distribution_by_month(month) self.crop_distribution = self.add_dates(distribution_by_month) diff --git a/hermesv3_bu/sectors/agricultural_machinery_sector.py b/hermesv3_bu/sectors/agricultural_machinery_sector.py index ffd297769867da91ec08178c7c94175859c315aa..3dcd17bfe5e1d0798f85b7eac6b478d31ac5668e 100755 --- a/hermesv3_bu/sectors/agricultural_machinery_sector.py +++ b/hermesv3_bu/sectors/agricultural_machinery_sector.py @@ -314,7 +314,7 @@ class AgriculturalMachinerySector(AgriculturalSector): self.logger.write_log('\tCalculating emissions') distribution_by_month = {} - for month in self.months.iterkeys(): + for month in self.months.keys(): distribution_by_month[month] = self.calculate_monthly_emissions_by_nut(month) distribution_by_month[month] = self.distribute(distribution_by_month[month]) diff --git a/hermesv3_bu/sectors/agricultural_sector.py b/hermesv3_bu/sectors/agricultural_sector.py index cb8fbac85ff4f5274a39d343e0dd0b5227a7f5cf..be849801a979e470eb9d4eca5a727f9bda115e42 100755 --- a/hermesv3_bu/sectors/agricultural_sector.py +++ b/hermesv3_bu/sectors/agricultural_sector.py @@ -13,6 +13,8 @@ from hermesv3_bu.sectors.sector import Sector from hermesv3_bu.io_server.io_shapefile import IoShapefile from hermesv3_bu.io_server.io_raster import IoRaster from hermesv3_bu.logger.log import Log +from geopandas import GeoDataFrame +from pandas import DataFrame class AgriculturalSector(Sector): @@ -162,26 +164,44 @@ class AgriculturalSector(Sector): self.logger.write_time_log('AgriculturalSector', 'calculate_num_days', timeit.default_timer() - spent_time) return day_dict - def get_crop_from_land_uses(self, crop_from_landuse_path): + def get_crop_from_land_uses(self, crop_from_land_use_path): + """ + Get the involved land uses and their weight for each crop. + + :param crop_from_land_use_path: Path to the file that contains the crops and their involved land uses with the + weights. + :type crop_from_land_use_path: str + + :return: Dictionary with the crops as keys and a list as value. That list have as many elements as involved + land uses in that crop. Each element of that list is a tuple with the land use as first element and their + weight on the second place. + :rtype: dict + """ import re spent_time = timeit.default_timer() - crop_from_landuse = pd.read_csv(crop_from_landuse_path, sep=';') + crop_from_landuse = pd.read_csv(crop_from_land_use_path, sep=';') crop_dict = {} for i, element in crop_from_landuse.iterrows(): # if element.crop in self.crop_list: - land_uses = list(map(str, re.split(' , |, | ,|,| ', element.land_use))) - weights = list(map(str, re.split(' , |, | ,|,| ', element.weight))) - crop_dict[element.crop] = zip(land_uses, weights) - self.logger.write_time_log('AgriculturalSector', 'get_crop_from_land_uses', timeit.default_timer() - spent_time) + land_uses = list(map(int, re.split(' , |, | ,|,| ', element.land_use))) + weights = list(map(float, re.split(' , |, | ,|,| ', element.weight))) + crop_dict[element.crop] = list(zip(land_uses, weights)) + self.logger.write_time_log('AgriculturalSector', 'get_crop_from_land_uses', timeit.default_timer() - spent_time) return crop_dict def get_involved_land_uses(self): + """ + Generate the list of involved land uses. + + :return: List of land uses involved in the selected crops + :rtype: list + """ spent_time = timeit.default_timer() land_uses_list = [] - for land_use_and_weight_list in self.crop_from_landuse.itervalues(): + for land_use_and_weight_list in self.crop_from_landuse.values(): for land_use_and_weight in land_use_and_weight_list: land_use = int(land_use_and_weight[0]) if land_use not in land_uses_list: @@ -190,22 +210,16 @@ class AgriculturalSector(Sector): return land_uses_list - def get_land_use_src_by_nut_old(self, land_uses): - spent_time = timeit.default_timer() - - df_land_use_with_nut = gpd.read_file(self.land_uses_path) - - df_land_use_with_nut.rename(columns={'CODE': 'NUT', 'gridcode': 'land_use'}, inplace=True) - - df_land_use_with_nut = df_land_use_with_nut.loc[df_land_use_with_nut['land_use'].isin(land_uses), :] - - df_land_use_with_nut = self.spatial_overlays(df_land_use_with_nut, - self.clip.shapefile.to_crs(df_land_use_with_nut.crs)) + def get_land_use_src_by_nut(self, land_uses): + """ + Create a shapefile with the involved source cells from the input raster and only for the given land uses. - self.logger.write_time_log('AgriculturalSector', 'get_land_use_src_by_nut', timeit.default_timer() - spent_time) - return df_land_use_with_nut + :param land_uses: List of land uses to use. + :type land_uses: list - def get_land_use_src_by_nut(self, land_uses): + :return: Shapefile with the land use and NUT code of each source cell. Index: CELL_ID + :rtype: GeoDataFrame + """ spent_time = timeit.default_timer() land_use_src_by_nut_path = os.path.join(self.auxiliary_dir, 'agriculture', 'land_uses', 'land_uses_src.shp') if not os.path.exists(land_use_src_by_nut_path): @@ -214,31 +228,46 @@ class AgriculturalSector(Sector): os.path.join(self.auxiliary_dir, 'agriculture', 'land_uses', 'land_uses_clip.tif'), values=land_uses) land_uses_shp = IoRaster(self.comm_agr).to_shapefile_serie(land_uses_clipped) + ccaa_shp = IoShapefile(self.comm_agr).read_shapefile_serial(self.nut_shapefile).to_crs(land_uses_shp.crs) ccaa_shp.drop(columns=['NAME', 'ORDER06'], inplace=True) ccaa_shp.rename(columns={'CODE': 'NUT'}, inplace=True) - land_use_src_by_nut = self.spatial_overlays(land_uses_shp, ccaa_shp, how='intersection') + # ccaa_shp.set_index('NUT', inplace=True) + land_use_src_by_nut = self.spatial_overlays(land_uses_shp.reset_index(), ccaa_shp, how='intersection') land_use_src_by_nut.drop(columns=['idx1', 'idx2', 'CELL_ID'], inplace=True) land_use_src_by_nut.rename(columns={'data': 'land_use'}, inplace=True) land_use_src_by_nut['land_use'] = land_use_src_by_nut['land_use'].astype(np.int16) land_use_src_by_nut.reset_index(inplace=True, drop=True) + IoShapefile(self.comm_agr).write_shapefile_serial(land_use_src_by_nut, land_use_src_by_nut_path) else: land_use_src_by_nut = IoShapefile(self.comm_agr).read_shapefile_serial(land_use_src_by_nut_path) - + # land_use_src_by_nut.index.name = 'CELL_ID' self.logger.write_time_log('AgriculturalSector', 'get_land_use_src_by_nut', timeit.default_timer() - spent_time) return land_use_src_by_nut def get_tot_land_use_by_nut(self, land_uses): + """ + Get the total amount of land use area by NUT of the involved land uses. + + :param land_uses: Involved land uses. + :type land_uses: list + + :return: Total amount of land use area by NUT. + :rtype: DataFrame + """ spent_time = timeit.default_timer() + df = pd.read_csv(self.land_use_by_nut) df = df.loc[df['land_use'].isin(land_uses), :] - self.logger.write_time_log('AgriculturalSector', 'get_tot_land_use_by_nut', timeit.default_timer() - spent_time) + df.set_index(['NUT', 'land_use'], inplace=True) + self.logger.write_time_log('AgriculturalSector', 'get_tot_land_use_by_nut', timeit.default_timer() - spent_time) return df def get_land_use_by_nut_csv(self, land_use_distribution_src_nut, land_uses): """ + Get the involved area of land use by involved NUT. :param land_use_distribution_src_nut: Shapefile with the polygons of all the land uses for each NUT. :type land_use_distribution_src_nut: GeoDataFrame @@ -246,7 +275,8 @@ class AgriculturalSector(Sector): :param land_uses: Land uses to take into account. :type land_uses: list - :return: + :return: DataFrame with the total amount of land use area by involved NUT. + :rtype: DataFrame """ spent_time = timeit.default_timer() @@ -254,6 +284,8 @@ class AgriculturalSector(Sector): land_use_by_nut = land_use_distribution_src_nut.groupby(['NUT', 'land_use']).sum().reset_index() land_use_by_nut = land_use_by_nut.loc[land_use_by_nut['land_use'].isin(land_uses), :] + land_use_by_nut.set_index(['NUT', 'land_use'], inplace=True) + self.logger.write_time_log('AgriculturalSector', 'get_land_use_by_nut_csv', timeit.default_timer() - spent_time) return land_use_by_nut @@ -271,27 +303,20 @@ class AgriculturalSector(Sector): :rtype: DataFrame """ spent_time = timeit.default_timer() + if nuts is not None: - land_use_by_nut = land_use_by_nut.loc[land_use_by_nut['NUT'].isin(nuts), :] - new_dict = pd.DataFrame() - for nut in np.unique(land_use_by_nut['NUT']): - aux_dict = {'NUT': [nut]} - - for crop, landuse_weight_list in self.crop_from_landuse.iteritems(): - aux = 0 - for landuse, weight in landuse_weight_list: - try: - aux += land_use_by_nut.loc[(land_use_by_nut['land_use'] == int(landuse)) & - (land_use_by_nut['NUT'] == nut), 'area'].values[0] * float(weight) - except IndexError: - # TODO understand better that error - pass - aux_dict[crop] = [aux] - new_dict = new_dict.append(pd.DataFrame.from_dict(aux_dict), ignore_index=True) - new_dict.set_index('NUT', inplace=True) + land_use_by_nut = land_use_by_nut.loc[nuts, :] + new_df = pd.DataFrame(index=np.unique(land_use_by_nut.index.get_level_values('NUT')), + columns=self.crop_from_landuse.keys()) + new_df.fillna(0, inplace=True) + + for crop, land_use_weight_list in self.crop_from_landuse.items(): + for land_use, weight in land_use_weight_list: + new_df[crop] += land_use_by_nut.xs(land_use, level='land_use')['area'] * weight self.logger.write_time_log('AgriculturalSector', 'land_use_to_crop_by_nut', timeit.default_timer() - spent_time) - return new_dict + + return new_df def get_crop_shape_by_nut(self, crop_by_nut, tot_crop_by_nut): """ @@ -355,7 +380,7 @@ class AgriculturalSector(Sector): spent_time = timeit.default_timer() crop_distribution_src = land_use_distribution_src_nut.loc[:, ['NUT', 'geometry']] - for crop, landuse_weight_list in self.crop_from_landuse.iteritems(): + for crop, landuse_weight_list in self.crop_from_landuse.items(): crop_distribution_src[crop] = 0 for landuse, weight in landuse_weight_list: crop_distribution_src.loc[land_use_distribution_src_nut['land_use'] == int(landuse), crop] += \ @@ -390,7 +415,9 @@ class AgriculturalSector(Sector): crop_distribution = crop_distribution.to_crs(self.grid_shp.crs) crop_distribution['src_inter_fraction'] = crop_distribution.geometry.area - crop_distribution = self.spatial_overlays(crop_distribution, self.grid_shp, how='intersection') + + crop_distribution = self.spatial_overlays(crop_distribution.reset_index(), self.grid_shp.reset_index(), + how='intersection') crop_distribution['src_inter_fraction'] = \ crop_distribution.geometry.area / crop_distribution['src_inter_fraction'] @@ -426,17 +453,16 @@ class AgriculturalSector(Sector): if self.comm_agr.Get_rank() == 0: self.logger.write_log('Creating the crop distribution shapefile on the grid resolution.', message_level=2) + involved_land_uses = self.get_involved_land_uses() land_use_distribution_src_nut = self.get_land_use_src_by_nut(involved_land_uses) - land_use_by_nut = self.get_land_use_by_nut_csv(land_use_distribution_src_nut, involved_land_uses) tot_land_use_by_nut = self.get_tot_land_use_by_nut(involved_land_uses) crop_by_nut = self.land_use_to_crop_by_nut(land_use_by_nut) tot_crop_by_nut = self.land_use_to_crop_by_nut( - tot_land_use_by_nut, nuts=list(np.unique(land_use_by_nut['NUT']))) - + tot_land_use_by_nut, nuts=list(np.unique(land_use_by_nut.index.get_level_values('NUT')))) crop_shape_by_nut = self.get_crop_shape_by_nut(crop_by_nut, tot_crop_by_nut) crop_area_by_nut = self.get_crop_area_by_nut(crop_shape_by_nut) @@ -444,8 +470,8 @@ class AgriculturalSector(Sector): crop_area_by_nut, land_use_distribution_src_nut) crop_distribution_dst = self.get_crop_distribution_in_dst_cells(crop_distribution_src) - crop_distribution_dst = self.add_timezone(crop_distribution_dst) + IoShapefile(self.comm).write_shapefile_serial(crop_distribution_dst, file_path) else: self.logger.write_log('Waiting for the master process that creates the crop distribution shapefile.', @@ -482,7 +508,7 @@ class AgriculturalSector(Sector): """ rank_list = [] - for sector, sector_procs in sector_dict.iteritems(): + for sector, sector_procs in sector_dict.items(): if sector in ['crop_operations', 'crop_fertilizers', 'agricultural_machinery']: rank_list += sector_procs rank_list = sorted(rank_list) diff --git a/hermesv3_bu/sectors/aviation_sector.py b/hermesv3_bu/sectors/aviation_sector.py index a9718fd42da441ca2343a66888fdd8fbacee342c..a2802196172121458d5416fe1fdea9e2eb19634a 100755 --- a/hermesv3_bu/sectors/aviation_sector.py +++ b/hermesv3_bu/sectors/aviation_sector.py @@ -228,7 +228,7 @@ class AviationSector(Sector): spent_time = timeit.default_timer() if self.comm.Get_rank() == 0: runway_shapefile = gpd.read_file(airport_runways_shapefile_path) - runway_shapefile.set_index('airport_id', inplace=True) + runway_shapefile.set_index(['airport_id', 'runway_id'], inplace=True) runway_shapefile = runway_shapefile.loc[self.airport_list_full, :] runway_shapefile = runway_shapefile.loc[runway_shapefile['cons'] == 1, ['approach_f', 'climbout_f', 'geometry']] @@ -286,13 +286,9 @@ class AviationSector(Sector): :rtype: DataFrame """ spent_time = timeit.default_timer() - check = False + operations = pd.read_csv(operations_csv_path) - if check: - for index, aux_operations in operations.groupby(['airport_id', 'plane_id', 'operation']): - if len(aux_operations) > 1: - print index, len(aux_operations) if self.plane_list is None: self.plane_list = list(np.unique(operations['plane_id'].values)) else: @@ -320,13 +316,10 @@ class AviationSector(Sector): :rtype: DataFrame """ spent_time = timeit.default_timer() - check = False + dataframe = pd.read_csv(planes_path) dataframe = dataframe.loc[dataframe['plane_id'].isin(self.plane_list)] - if check: - for index, aux_operations in dataframe.groupby('plane_id'): - if len(aux_operations) > 1: - print index, len(aux_operations) + dataframe.set_index('plane_id', inplace=True) self.logger.write_time_log('AviationSector', 'read_planes', timeit.default_timer() - spent_time) @@ -477,6 +470,7 @@ class AviationSector(Sector): def normalize(df): total_fraction = df['{0}_f'.format(phase_type)].values.sum() df['{0}_f'.format(phase_type)] = df['{0}_f'.format(phase_type)] / total_fraction + return df.loc[:, ['{0}_f'.format(phase_type)]] self.logger.write_log('\t\tCalculating runway distribution for {0}'.format(phase_type), message_level=2) @@ -487,13 +481,11 @@ class AviationSector(Sector): if not os.path.exists(runway_distribution_path): if self.comm.rank == 0: runway_shapefile['{0}_f'.format(phase_type)] = runway_shapefile.groupby('airport_id').apply(normalize) - if not os.path.exists(os.path.dirname(runway_distribution_path)): - os.makedirs(os.path.dirname(runway_distribution_path)) - runway_shapefile.reset_index(inplace=True) + runway_shapefile.to_crs(self.grid_shp.crs, inplace=True) runway_shapefile['length'] = runway_shapefile.length # duplicating each runway by involved cell - runway_shapefile = gpd.sjoin(runway_shapefile, self.grid_shp.reset_index(), how="inner", + runway_shapefile = gpd.sjoin(runway_shapefile.reset_index(), self.grid_shp, how="inner", op='intersects') # Adding cell geometry runway_shapefile = runway_shapefile.merge(self.grid_shp.reset_index().loc[:, ['FID', 'geometry']], @@ -511,6 +503,8 @@ class AviationSector(Sector): runway_shapefile = runway_shapefile[['airport_id', 'FID', 'layer', 'fraction']] runway_shapefile = runway_shapefile.groupby(['airport_id', 'FID', 'layer']).sum() # runway_shapefile.set_index(['airport_id', 'FID', 'layer'], inplace=True) + if not os.path.exists(os.path.dirname(runway_distribution_path)): + os.makedirs(os.path.dirname(runway_distribution_path)) runway_shapefile.to_csv(runway_distribution_path) else: runway_shapefile = None @@ -1015,7 +1009,7 @@ class AviationSector(Sector): self.logger.write_log('\t\tTrajectory emissions distributed (approach, climb out)', message_level=2) emissions = pd.concat([airport_emissions, runway_departure_emissions, trajectory_arrival_emissions, - trajectory_departure_emisions, runway_arrival_emissions]) + trajectory_departure_emisions, runway_arrival_emissions], sort=False) emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum() runway_arrival_emissions_wear = runway_arrival_emissions_wear.groupby(['FID', 'layer', 'tstep']).sum() @@ -1028,7 +1022,7 @@ class AviationSector(Sector): runway_arrival_emissions_wear = self.speciate(runway_arrival_emissions_wear, 'landing_wear') emissions = self.speciate(emissions, 'default') - emissions = pd.concat([emissions, runway_arrival_emissions_wear]) + emissions = pd.concat([emissions, runway_arrival_emissions_wear], sort=False) emissions = emissions[(emissions.T != 0).any()] emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum() diff --git a/hermesv3_bu/sectors/livestock_sector.py b/hermesv3_bu/sectors/livestock_sector.py index 1d403cfe5f4abfd17a9f4e80d30938240633bb1a..4a4cbfea5b71cba219753a962ed7bd1bdebe3ff8 100755 --- a/hermesv3_bu/sectors/livestock_sector.py +++ b/hermesv3_bu/sectors/livestock_sector.py @@ -322,7 +322,8 @@ class LivestockSector(Sector): animal_distribution['src_inter_fraction'] = animal_distribution.geometry.area # Making the intersection between the src distribution and the destiny grid - animal_distribution = self.spatial_overlays(animal_distribution, self.grid_shp, how='intersection') + animal_distribution = self.spatial_overlays(animal_distribution.reset_index(), self.grid_shp, + how='intersection') # Getting proportion of intersection in the src cell (src_area/portion_area) animal_distribution['src_inter_fraction'] = \ animal_distribution.geometry.area / animal_distribution['src_inter_fraction'] @@ -794,7 +795,7 @@ class LivestockSector(Sector): out_df.loc[:, out_p] = out_df.loc[:, out_p].multiply(1000. * (1. / self.molecular_weights['pm10'])) # Preparing PM10 for PMC - if 'pmc' in [x.lower() for x in self.speciation_map.iterkeys()]: + if 'pmc' in [x.lower() for x in self.speciation_map.keys()]: out_df['aux_pm10'] = 0 for i, animal in pd.read_csv(os.path.join(self.ef_dir, 'pm.csv')).iterrows(): # Iterating by animal subtype @@ -846,7 +847,7 @@ class LivestockSector(Sector): out_df.loc[:, out_p] = out_df.loc[:, out_p].multiply(1000. * (1. / self.molecular_weights['pm25'])) # Preparing PM2.5 for PMC - if 'pmc' in [x.lower() for x in self.speciation_map.iterkeys()]: + if 'pmc' in [x.lower() for x in self.speciation_map.keys()]: out_df['aux_pm25'] = 0 for i, animal in pd.read_csv(os.path.join(self.ef_dir, 'pm.csv')).iterrows(): if animal.Code.startswith(tuple(self.animal_list)): @@ -890,7 +891,7 @@ class LivestockSector(Sector): (30. / 14.) * 1000. * (1. / self.molecular_weights['nox_no'])) # ===== PMC ===== - if 'pmc' in [x.lower() for x in self.speciation_map.iterkeys()]: + if 'pmc' in [x.lower() for x in self.speciation_map.keys()]: pmc_name = 'PMC' self.logger.write_log('\t\t\tCalculating {0} emissions'.format(pmc_name), message_level=3) if all(x in [x.lower() for x in self.source_pollutants] for x in ['pm10', 'pm25']): diff --git a/hermesv3_bu/sectors/point_source_sector.py b/hermesv3_bu/sectors/point_source_sector.py index d313d068f532ec6b7113f1864c3e3f1b9a40144b..1ad281fe53e1f50e8066200433757e3db6c3461f 100755 --- a/hermesv3_bu/sectors/point_source_sector.py +++ b/hermesv3_bu/sectors/point_source_sector.py @@ -277,8 +277,9 @@ class PointSourceSector(Sector): timeit.default_timer() - spent_time) return catalog - @staticmethod - def get_meteo_xy(dataframe, netcdf_path): + def get_meteo_xy(self, dataframe, netcdf_path): + spent_time = timeit.default_timer() + def nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None): """Finds the nearest point and return the corresponding value from specified column. https://automating-gis-processes.github.io/2017/lessons/L3/nearest-neighbour.html @@ -304,7 +305,7 @@ class PointSourceSector(Sector): nc_dataframe = pd.DataFrame.from_dict({'X': x.flatten(), 'Y': y.flatten()}) nc_dataframe = gpd.GeoDataFrame(nc_dataframe, - geometry=[Point(xy) for xy in zip(lons.flatten(), lats.flatten())], + geometry=[Point(xy) for xy in list(zip(lons.flatten(), lats.flatten()))], crs={'init': 'epsg:4326'}) nc_dataframe['index'] = nc_dataframe.index @@ -315,6 +316,7 @@ class PointSourceSector(Sector): dataframe['X'] = nc_dataframe.loc[dataframe['meteo_index'], 'X'].values dataframe['Y'] = nc_dataframe.loc[dataframe['meteo_index'], 'Y'].values + self.logger.write_time_log('PointSourceSector', 'get_meteo_xy', timeit.default_timer() - spent_time) return dataframe[['X', 'Y']] def get_plumerise_meteo(self, catalog): @@ -475,7 +477,7 @@ class PointSourceSector(Sector): # TODO Use IoNetCDF spent_time = timeit.default_timer() - # Adding meteo X, Y array index to the catalog + meteo_xy = self.get_meteo_xy(catalog.groupby('Code').first(), os.path.join( self.plume_rise_pahts['temperature_sfc_dir'], 't2_{0}.nc'.format(self.date_array[0].replace(hour=0).strftime("%Y%m%d%H")))) @@ -749,6 +751,9 @@ class PointSourceSector(Sector): catalog.reset_index(inplace=True) catalog = catalog.to_crs(self.grid_shp.crs) + catalog.to_file('~/temp/catalog.shp') + self.grid_shp.reset_index().to_file('~/temp/grid.shp') + catalog = gpd.sjoin(catalog, self.grid_shp.reset_index(), how="inner", op='intersects') # Drops duplicates when the point source is on the boundary of the cell diff --git a/hermesv3_bu/sectors/recreational_boats_sector.py b/hermesv3_bu/sectors/recreational_boats_sector.py index 59177fcfa3cf83a97ce0f5c2d911fd443383eb79..bbad747c22ff05df09c7c98120a940825df947b2 100755 --- a/hermesv3_bu/sectors/recreational_boats_sector.py +++ b/hermesv3_bu/sectors/recreational_boats_sector.py @@ -115,7 +115,7 @@ class RecreationalBoatsSector(Sector): new_dataframe = self.density_map.copy() new_dataframe.drop(columns='data', inplace=True) - for pollutant, annual_value in annual_emissions.iteritems(): + for pollutant, annual_value in annual_emissions.items(): new_dataframe[pollutant] = self.density_map['data'] * annual_value self.logger.write_time_log('RecreationalBoatsSector', 'calculate_yearly_emissions', @@ -160,15 +160,15 @@ class RecreationalBoatsSector(Sector): dataframe['date_as_date'] = dataframe['date'].dt.date dataframe['MF'] = dataframe.groupby('month').apply(get_mf) - dataframe[self.output_pollutants] = dataframe[self.output_pollutants].multiply(dataframe['MF'], axis=0) + dataframe[self.output_pollutants] = dataframe[self.output_pollutants].mul(dataframe['MF'], axis=0) dataframe.drop(columns=['month', 'MF'], inplace=True) dataframe['WF'] = dataframe.groupby('date_as_date').apply(get_wf) - dataframe[self.output_pollutants] = dataframe[self.output_pollutants].multiply(dataframe['WF'], axis=0) + dataframe[self.output_pollutants] = dataframe[self.output_pollutants].mul(dataframe['WF'], axis=0) dataframe.drop(columns=['weekday', 'date', 'date_as_date', 'WF'], inplace=True) dataframe['HF'] = dataframe.groupby('hour').apply(get_hf) - dataframe[self.output_pollutants] = dataframe[self.output_pollutants].multiply(dataframe['HF'], axis=0) + dataframe[self.output_pollutants] = dataframe[self.output_pollutants].mul(dataframe['HF'], axis=0) dataframe.drop(columns=['hour', 'HF'], inplace=True) self.logger.write_time_log('RecreationalBoatsSector', 'calculate_hourly_emissions', diff --git a/hermesv3_bu/sectors/residential_sector.py b/hermesv3_bu/sectors/residential_sector.py index fdd12479b6002f20f3da2d05dcea02d313dcd2fc..22cd1161fc38bfa2c62f097f7acb35c22a016252 100755 --- a/hermesv3_bu/sectors/residential_sector.py +++ b/hermesv3_bu/sectors/residential_sector.py @@ -54,7 +54,6 @@ class ResidentialSector(Sector): self.heating_degree_day_path = heating_degree_day_path self.temperature_path = temperature_path - self.logger.write_time_log('ResidentialSector', '__init__', timeit.default_timer() - spent_time) def read_ef_file(self, path): @@ -126,10 +125,12 @@ class ResidentialSector(Sector): spent_time = timeit.default_timer() src_distribution.to_crs(self.grid_shp.crs, inplace=True) - src_distribution.to_file(os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_src.shp')) + # src_distribution.reset_index().to_file( + # os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_src.shp')) src_distribution['src_inter_fraction'] = src_distribution.geometry.area src_distribution = self.spatial_overlays(src_distribution, self.grid_shp.reset_index(), how='intersection') - src_distribution.to_file(os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_raw.shp')) + # src_distribution.reset_index().to_file( + # os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_raw.shp')) src_distribution['src_inter_fraction'] = src_distribution.geometry.area / src_distribution[ 'src_inter_fraction'] @@ -155,22 +156,25 @@ class ResidentialSector(Sector): population_density = IoRaster(self.comm).clip_raster_with_shapefile_poly( population_density_map, self.clip.shapefile, os.path.join(self.auxiliary_dir, 'residential', 'population_density.tif')) - population_density = IoRaster(self.comm).to_shapefile_serie(population_density) + population_density = IoRaster(self.comm).to_shapefile_serie_by_cell(population_density) population_density.rename(columns={'data': 'pop'}, inplace=True) population_type = IoRaster(self.comm).clip_raster_with_shapefile_poly( population_type_map, self.clip.shapefile, os.path.join(self.auxiliary_dir, 'residential', 'population_type.tif')) - population_type = IoRaster(self.comm).to_shapefile_serie(population_type) + population_type = IoRaster(self.comm).to_shapefile_serie_by_cell(population_type) population_type.rename(columns={'data': 'type'}, inplace=True) + population_type['type'] = population_type['type'].astype(np.int16) + population_type.loc[population_type['type'] == 2, 'type'] = 3 + population_density['type'] = population_type['type'] - population_density.loc[population_density['type'] == 2, 'type'] = 3 + # population_density = gpd.sjoin(population_density, population_type, how='left', op='intersects') + # population_density.drop(columns=['index_right'], inplace=True) population_density = self.add_nut_code(population_density, prov_shapefile, nut_value='ORDER07') population_density.rename(columns={'nut_code': 'prov'}, inplace=True) - population_density = population_density.loc[population_density['prov'] != -999, :] population_density = self.add_nut_code(population_density, ccaa_shapefile, nut_value='ORDER06') population_density.rename(columns={'nut_code': 'ccaa'}, inplace=True) @@ -185,7 +189,8 @@ class ResidentialSector(Sector): self.pop_type_by_ccaa = pd.read_csv(self.pop_type_by_ccaa).set_index(['ccaa', 'type']) self.pop_type_by_prov = pd.read_csv(self.pop_type_by_prov).set_index(['prov', 'type']) - fuel_distribution = population_density.loc[:, ['CELL_ID', 'geometry']].copy() + fuel_distribution = population_density[['geometry']].copy() + fuel_distribution.index.name = 'CELL_ID' for fuel in self.fuel_list: fuel_distribution[fuel] = 0 @@ -238,9 +243,13 @@ class ResidentialSector(Sector): fuel] = population_density['pop'].multiply( energy_consumption / total_pop) fuel_distribution = self.to_dst_resolution(fuel_distribution) - IoShapefile(self.comm).write_shapefile_serial(fuel_distribution, fuel_distribution_path) + + IoShapefile(self.comm).write_shapefile_serial(fuel_distribution.reset_index(), fuel_distribution_path) else: fuel_distribution = IoShapefile(self.comm).read_shapefile_serial(fuel_distribution_path) + fuel_distribution.drop(columns=['index'], inplace=True) + + fuel_distribution.set_index('FID', inplace=True) self.logger.write_time_log('ResidentialSector', 'get_fuel_distribution', timeit.default_timer() - spent_time) return fuel_distribution diff --git a/hermesv3_bu/sectors/sector.py b/hermesv3_bu/sectors/sector.py index b2dc6ab862f3f20bf8fc5899c63ed51221a5772b..edcf9555bcf50416df96d792e2edadc6e855469f 100755 --- a/hermesv3_bu/sectors/sector.py +++ b/hermesv3_bu/sectors/sector.py @@ -7,6 +7,7 @@ from hermesv3_bu.logger.log import Log import numpy as np import pandas as pd import geopandas as gpd +from geopandas import GeoDataFrame from mpi4py import MPI @@ -90,7 +91,7 @@ class Sector(object): self.speciation_profile = self.read_speciation_profiles(speciation_profiles_path) self.molecular_weights = self.read_molecular_weights(molecular_weights_path) - self.output_pollutants = self.speciation_map.keys() + self.output_pollutants = list(self.speciation_map.keys()) self.logger.write_time_log('Sector', '__init__', timeit.default_timer() - spent_time) @@ -320,11 +321,11 @@ class Sector(object): spent_time = timeit.default_timer() weekdays_factors = 0 num_days = 0 - for day in xrange(7): + for day in range(7): weekdays_factors += profile[day] * weekdays[day] num_days += weekdays[day] increment = float(num_days - weekdays_factors) / num_days - for day in xrange(7): + for day in range(7): profile[day] = (increment + profile[day]) / num_days self.logger.write_time_log('Sector', 'calculate_weekday_factor_full_month', timeit.default_timer() - spent_time) @@ -343,7 +344,7 @@ class Sector(object): from calendar import monthrange, weekday, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY spent_time = timeit.default_timer() weekdays = [MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY] - days = [weekday(date.year, date.month, d + 1) for d in xrange(monthrange(date.year, date.month)[1])] + days = [weekday(date.year, date.month, d + 1) for d in range(monthrange(date.year, date.month)[1])] weekdays_dict = {} for i, day in enumerate(weekdays): @@ -465,6 +466,8 @@ class Sector(object): :param how: Operation to do :return: GeoDataFrame """ + from functools import reduce + spent_time = timeit.default_timer() df1 = df1.copy() df2 = df2.copy() @@ -556,6 +559,6 @@ class Sector(object): def get_output_pollutants(self, input_pollutant): spent_time = timeit.default_timer() - return_value = [outs for outs, ints in self.speciation_map.iteritems() if ints == input_pollutant] + return_value = [outs for outs, ints in self.speciation_map.items() if ints == input_pollutant] self.logger.write_time_log('Sector', 'get_output_pollutants', timeit.default_timer() - spent_time) return return_value diff --git a/hermesv3_bu/sectors/sector_manager.py b/hermesv3_bu/sectors/sector_manager.py index f36f32c1ed19a93eae922540e67489579c60166d..51b355d9b305667d43ad56091414a8075daf8b92 100755 --- a/hermesv3_bu/sectors/sector_manager.py +++ b/hermesv3_bu/sectors/sector_manager.py @@ -28,12 +28,12 @@ class SectorManager(object): self.logger = logger self.sector_list = self.make_sector_list(arguments, comm_world.Get_size()) self.logger.write_log('Sector process distribution:') - for sect, procs in self.sector_list.iteritems(): + for sect, procs in self.sector_list.items(): self.logger.write_log('\t{0}: {1}'.format(sect, procs)) color = 10 agg_color = 99 - for sector, sector_procs in self.sector_list.iteritems(): + for sector, sector_procs in self.sector_list.items(): if sector == 'aviation' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.aviation_sector import AviationSector self.sector = AviationSector( @@ -221,13 +221,13 @@ class SectorManager(object): for sector in SECTOR_LIST: if vars(arguments)['do_{0}'.format(sector)]: n_procs = vars(arguments)['{0}_processors'.format(sector)] - sector_dict[sector] = [accum + x for x in xrange(n_procs)] + sector_dict[sector] = [accum + x for x in range(n_procs)] accum += n_procs if accum != max_procs: raise ValueError("The selected number of processors '{0}' does not fit ".format(max_procs) + "with the sum of processors dedicated for all the sectors " + "'{0}': {1}".format(accum, {sector: len(sector_procs) - for sector, sector_procs in sector_dict.iteritems()})) + for sector, sector_procs in sector_dict.items()})) self.logger.write_time_log('SectorManager', 'make_sector_list', timeit.default_timer() - spent_time) return sector_dict diff --git a/hermesv3_bu/sectors/shipping_port_sector.py b/hermesv3_bu/sectors/shipping_port_sector.py index bb6823bd1373cee9970276815873482073f4e4b3..316959cecb122bc6cfbf40faabdb0347bdc4fcc1 100755 --- a/hermesv3_bu/sectors/shipping_port_sector.py +++ b/hermesv3_bu/sectors/shipping_port_sector.py @@ -110,7 +110,6 @@ class ShippingPortSector(Sector): port_shp = gpd.sjoin(port_shp, self.clip.shapefile.to_crs(port_shp.crs), how='inner', op='intersects') port_list = np.unique(port_shp['code'].values) - print port_list if len(port_list) < self.comm.Get_size(): raise ValueError("The chosen number of processors {0} exceeds the number of involved ports {1}.".format( self.comm.Get_size(), len(port_list)) + " Set {0} at shipping_port_processors value.".format( @@ -609,8 +608,7 @@ class ShippingPortSector(Sector): self.logger.write_log('\t\tCalculating yearly emissions', message_level=2) manoeuvring, hoteling = self.calculate_yearly_emissions_by_port_vessel() - # print manoeuvring.reset_index().groupby('code').sum() - # print hoteling.reset_index().groupby('code').sum() + manoeuvring = self.add_timezone(manoeuvring, self.maneuvering_shapefile_path) hoteling = self.add_timezone(hoteling, self.hoteling_shapefile_path) diff --git a/hermesv3_bu/sectors/traffic_area_sector.py b/hermesv3_bu/sectors/traffic_area_sector.py index 023f2405c502ccb6ca0b0ea0c6f7f206d77e2fb3..5a744581b27663a358aba22bf5f49147f54f1243 100755 --- a/hermesv3_bu/sectors/traffic_area_sector.py +++ b/hermesv3_bu/sectors/traffic_area_sector.py @@ -207,11 +207,11 @@ class TrafficAreaSector(Sector): [0.025, 0.025, 0.025, 0.025, 0.025, 0.027083, 0.03125, 0.0375, 0.045833, 0.05625, 0.060417, 0.066667, 0.06875, 0.072917, 0.070833, 0.064583, 0.05625, 0.045833, 0.0375, 0.03125, 0.027083, 0.025, 0.025, 0.025]) - for x in xrange(24): + for x in range(24): temperature['t_{0}'.format(x)] = default_profile[x] else: - temp_list = ['t_{0}'.format(x) for x in xrange(24)] + temp_list = ['t_{0}'.format(x) for x in range(24)] temperature.loc[:, temp_list] = temperature[temp_list] + 273.15 temperature.loc[:, temp_list] = temperature[temp_list].subtract(temperature[temp_list].min(axis=1), axis=0) @@ -250,13 +250,13 @@ class TrafficAreaSector(Sector): temperature = IoNetcdf(self.comm).get_hourly_data_from_netcdf( self.evaporative['c_lon'].min(), self.evaporative['c_lon'].max(), self.evaporative['c_lat'].min(), self.evaporative['c_lat'].max(), self.temperature_dir, 'tas', self.date_array) - temperature.rename(columns={x: 't_{0}'.format(x) for x in xrange(len(self.date_array))}, inplace=True) + temperature.rename(columns={x: 't_{0}'.format(x) for x in range(len(self.date_array))}, inplace=True) # From Kelvin to Celsius degrees - temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] = \ - temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] - 273.15 + temperature.loc[:, ['t_{0}'.format(x) for x in range(len(self.date_array))]] = \ + temperature.loc[:, ['t_{0}'.format(x) for x in range(len(self.date_array))]] - 273.15 temperature_mean = gpd.GeoDataFrame(temperature[['t_{0}'.format(x) for x in - xrange(len(self.date_array))]].mean(axis=1), + range(len(self.date_array))]].mean(axis=1), columns=['temp'], geometry=temperature.geometry) temperature_mean['REC'] = temperature['REC'] @@ -324,7 +324,7 @@ class TrafficAreaSector(Sector): spent_time = timeit.default_timer() speciated_df = self.evaporative.drop(columns=['nmvoc']) - out_p_list = [out_p for out_p, in_p_aux in self.speciation_map.iteritems() if in_p_aux == 'nmvoc'] + out_p_list = [out_p for out_p, in_p_aux in self.speciation_map.items() if in_p_aux == 'nmvoc'] for p in out_p_list: # From g/day to mol/day @@ -337,12 +337,9 @@ class TrafficAreaSector(Sector): spent_time = timeit.default_timer() df = df.loc[:, ['data', 'FID']].groupby('FID').sum() - # print pop_nut_cell ef_df = pd.read_csv(self.small_cities_ef_file, sep=',') - # print ef_df ef_df.drop(['Code', 'Copert_V_name'], axis=1, inplace=True) for pollutant in ef_df.columns.values: - # print ef_df[pollutant].iloc[0] df[pollutant] = df['data'] * ef_df[pollutant].iloc[0] df.drop('data', axis=1, inplace=True) @@ -364,7 +361,7 @@ class TrafficAreaSector(Sector): inc = 1 while len(grid.loc[grid['timezone'] == '', :]) > 0: - print len(grid.loc[grid['timezone'] == '', :]) + # print(len(grid.loc[grid['timezone'] == '', :])) grid.loc[grid['timezone'] == '', 'timezone'] = aux_grid.loc[grid['timezone'] == '', :].apply( lambda x: tz.closest_timezone_at(lng=x['lons'], lat=x['lats'], delta_degree=inc), axis=1) inc += 1 @@ -389,10 +386,9 @@ class TrafficAreaSector(Sector): small_cities['date'] = small_cities.groupby('timezone')['utc'].apply( lambda x: pd.to_datetime(x).dt.tz_localize(pytz.utc).dt.tz_convert(x.name).dt.tz_localize(None)) small_cities.drop(['utc', 'timezone'], inplace=True, axis=1) - # print small_cities df_list = [] - for tstep in xrange(len(self.date_array)): + for tstep in range(len(self.date_array)): small_cities['month'] = small_cities['date'].dt.month small_cities['weekday'] = small_cities['date'].dt.dayofweek small_cities['hour'] = small_cities['date'].dt.hour diff --git a/hermesv3_bu/sectors/traffic_sector.py b/hermesv3_bu/sectors/traffic_sector.py index 2df4f77c2a3908fd08bb51c76906d9079f0df87f..a31d3c9c2827c1be9f91403eb27ca7a7dfb6a9e5 100755 --- a/hermesv3_bu/sectors/traffic_sector.py +++ b/hermesv3_bu/sectors/traffic_sector.py @@ -4,7 +4,9 @@ import os import timeit import pandas as pd +from pandas import DataFrame import geopandas as gpd +from geopandas import GeoDataFrame import numpy as np from datetime import timedelta import warnings @@ -69,13 +71,11 @@ class TrafficSector(Sector): self.crs = None # crs is the projection of the road links and it is set on the read_road_links function. self.write_rline = write_rline self.road_links = self.read_road_links(road_link_path) + self.load = load self.ef_common_path = ef_common_path self.temp_common_path = temp_common_path - # TODO use only date_array - self.timestep_num = len(self.date_array) - self.timestep_freq = 1 - self.starting_date = self.date_array[0] + self.add_local_date(self.date_array[0]) self.hot_cold_speciation = hot_cold_speciation @@ -87,14 +87,10 @@ class TrafficSector(Sector): self.fleet_compo = self.read_fleet_compo(fleet_compo_path, vehicle_list) self.speed_hourly = self.read_speed_hourly(speed_hourly_path) - self.hourly_profiles = pd.concat([ - pd.read_csv(hourly_mean_profiles_path), - pd.read_csv(hourly_weekday_profiles_path), - pd.read_csv(hourly_saturday_profiles_path), - pd.read_csv(hourly_sunday_profiles_path) - ]).reset_index() + self.hourly_profiles = self.read_all_hourly_profiles(hourly_mean_profiles_path, hourly_weekday_profiles_path, + hourly_saturday_profiles_path, hourly_sunday_profiles_path) - self.expanded = self.expand_road_links('hourly', len(self.date_array), 1) + self.expanded = self.expand_road_links() del self.fleet_compo, self.speed_hourly, self.monthly_profiles, self.weekly_profiles, self.hourly_profiles @@ -107,6 +103,16 @@ class TrafficSector(Sector): self.logger.write_time_log('TrafficSector', '__init__', timeit.default_timer() - spent_time) + def read_all_hourly_profiles(self, hourly_mean_profiles_path, hourly_weekday_profiles_path, + hourly_saturday_profiles_path, hourly_sunday_profiles_path): + hourly_profiles = pd.concat([self.read_hourly_profiles(hourly_mean_profiles_path), + self.read_hourly_profiles(hourly_weekday_profiles_path), + self.read_hourly_profiles(hourly_saturday_profiles_path), + self.read_hourly_profiles(hourly_sunday_profiles_path)]) + hourly_profiles.index = hourly_profiles.index.astype(str) + return hourly_profiles + + def read_speciation_map(self, path): """ Read the speciation map. @@ -171,7 +177,8 @@ class TrafficSector(Sector): self.road_links['start_date'] = self.road_links.groupby('timezone')['utc'].apply( lambda x: pd.to_datetime(x).dt.tz_localize(pytz.utc).dt.tz_convert(x.name).dt.tz_localize(None)) - del self.road_links['utc'], self.road_links['timezone'] + self.road_links.drop(columns=['utc', 'timezone'], inplace=True) + libc.malloc_trim(0) self.logger.write_time_log('TrafficSector', 'add_local_date', timeit.default_timer() - spent_time) return True @@ -197,13 +204,14 @@ class TrafficSector(Sector): :type path: str: :return: ... - :rtype: Pandas.DataFrame + :rtype: DataFrame """ spent_time = timeit.default_timer() df = pd.read_csv(path, sep=',', dtype=np.float32) df['P_speed'] = df['P_speed'].astype(int) - # df.set_index('P_speed', inplace=True) + df.set_index('P_speed', inplace=True) + self.logger.write_time_log('TrafficSector', 'read_speed_hourly', timeit.default_timer() - spent_time) return df @@ -224,11 +232,11 @@ class TrafficSector(Sector): min_num = nprocs - max_num index_list = [] prev = 0 - for i in xrange(max_num): + for i in range(max_num): prev += max_len index_list.append(prev) if min_num > 0: - for i in xrange(min_num - 1): + for i in range(min_num - 1): prev += min_len index_list.append(prev) @@ -245,32 +253,35 @@ class TrafficSector(Sector): if self.comm.Get_rank() == 0: df = gpd.read_file(path) - + df.drop(columns=['Adminis', 'CCAA', 'NETWORK_ID', 'Province', 'Road_name', 'aadt_m_sat', 'aadt_m_sun', + 'aadt_m_wd', 'Source'], inplace=True) + libc.malloc_trim(0) df = gpd.sjoin(df, self.clip.shapefile.to_crs(df.crs), how="inner", op='intersects') + df.drop(columns=['index_right', 'FID'], inplace=True) + libc.malloc_trim(0) + # Filtering road links to CONSiderate. df['CONS'] = df['CONS'].astype(np.int16) df = df[df['CONS'] != 0] df = df[df['aadt'] > 0] - # TODO Manu update shapefile replacing NULL values on 'aadt_m-mn' column + df.drop(columns=['CONS'], inplace=True) df = df.loc[df['aadt_m_mn'] != 'NULL', :] + libc.malloc_trim(0) # Adding identificator of road link - df['Link_ID'] = xrange(len(df)) - - del df['Adminis'], df['CCAA'], df['CONS'], df['NETWORK_ID'] - del df['Province'], df['Road_name'] - - # Deleting unused columns - del df['aadt_m_sat'], df['aadt_m_sun'], df['aadt_m_wd'], df['Source'] + df['Link_ID'] = range(len(df)) + df.set_index('Link_ID', inplace=True) libc.malloc_trim(0) + chunks = chunk_road_links(df, self.comm.Get_size()) else: chunks = None self.comm.Barrier() df = self.comm.scatter(chunks, root=0) + del chunks libc.malloc_trim(0) df = df.to_crs({'init': 'epsg:4326'}) @@ -290,13 +301,11 @@ class TrafficSector(Sector): df.loc[df['Road_type'] == '2', 'Road_type'] = 'Urban Off Peak' df.loc[df['Road_type'] == '3', 'Road_type'] = 'Urban Peak' - # TODO Read with units types - df['road_grad'] = df['road_grad'].astype(float) + df['road_grad'] = df['road_grad'].astype(np.float32) # Check if percents are ok if len(df[df['PcLight'] < 0]) is not 0: - print 'ERROR: PcLight < 0' - exit(1) + raise ValueError('ERROR: PcLight < 0') if self.write_rline: self.write_rline_roadlinks(df) @@ -347,7 +356,7 @@ class TrafficSector(Sector): # Checks that the splited DataFrames contain the full DataFrame if (len(df_code_slope_road) + len(df_code_slope) + len(df_code_road) + len(df_code)) != len(df): # TODO check that error - print 'ERROR in blablavbla' + raise ValueError('ERROR in blablavbla') return df_code_slope_road, df_code_slope, df_code_road, df_code elif emission_type == 'cold' or emission_type == 'tyre' or emission_type == 'road' or \ @@ -404,7 +413,7 @@ class TrafficSector(Sector): prlr = prlr <= MIN_RAIN dst = np.empty(prlr.shape) last = np.zeros((prlr.shape[-1])) - for time in xrange(prlr.shape[0]): + for time in range(prlr.shape[0]): dst[time, :] = (last + prlr[time, :]) * prlr[time, :] last = dst[time, :] @@ -425,13 +434,20 @@ class TrafficSector(Sector): def update_fleet_value(self, df): spent_time = timeit.default_timer() - # Calculating fleet value by fleet class - df.loc[:, 'Fleet_value'] = df['Fleet_value'] * df['aadt'] + def update_by_class(x): + if x.name == 'light_veh': + x['value'] = x['PcLight'].mul(x['Fleet_value'] * x['aadt'], axis='index') + elif x.name == 'heavy_veh': + x['value'] = x['PcHeavy'].mul(x['Fleet_value'], axis='index') + elif x.name == 'motos': + x['value'] = x['PcMoto'].mul(x['Fleet_value'], axis='index') + elif x.name == 'mopeds': + x['value'] = x['PcMoped'].mul(x['Fleet_value'], axis='index') + else: + x['value'] = np.nan + return x[['value']] - df.loc[df['Fleet_Class'] == 'light_veh', 'Fleet_value'] = df['PcLight'] * df['Fleet_value'] - df.loc[df['Fleet_Class'] == 'heavy_veh', 'Fleet_value'] = df['PcHeavy'] * df['Fleet_value'] - df.loc[df['Fleet_Class'] == 'motos', 'Fleet_value'] = df['PcMoto'] * df['Fleet_value'] - df.loc[df['Fleet_Class'] == 'mopeds', 'Fleet_value'] = df['PcMoped'] * df['Fleet_value'] + df['Fleet_value'] = df.groupby('Fleet_Class').apply(update_by_class) for link_id, aux_df in df.groupby('Link_ID'): aadt = round(aux_df['aadt'].min(), 1) @@ -444,143 +460,133 @@ class TrafficSector(Sector): df = df[df['Fleet_value'] > 0] # Deleting unused columns - del df['aadt'], df['PcLight'], df['PcHeavy'], df['PcMoto'], df['PcMoped'], df['Fleet_Class'] + df.drop(columns=['aadt', 'PcLight', 'PcHeavy', 'PcMoto', 'PcMoped' ,'Fleet_Class'], inplace=True) + libc.malloc_trim(0) + self.logger.write_time_log('TrafficSector', 'update_fleet_value', timeit.default_timer() - spent_time) return df - def calculate_timedelta(self, timestep_type, num_tstep, timestep_freq): - from datetime import timedelta - spent_time = timeit.default_timer() - - delta = timedelta(hours=timestep_freq * num_tstep) - - self.logger.write_time_log('TrafficSector', 'calculate_timedelta', timeit.default_timer() - spent_time) - return pd.Timedelta(delta) - - def calculate_hourly_speed(self, df): + def calculate_time_dependent_values(self, df): spent_time = timeit.default_timer() - df = df.merge(self.speed_hourly, left_on='profile_id', right_on='P_speed', how='left') - df['speed'] = df.groupby('hour').apply(lambda x: x[[str(x.name)]]) - - self.logger.write_time_log('TrafficSector', 'calculate_hourly_speed', timeit.default_timer() - spent_time) - return df['speed'] * df['speed_mean'] - - def calculate_temporal_factor(self, df): - spent_time = timeit.default_timer() - - def get_hourly_id_from_weekday(weekday): - if weekday <= 4: - return 'aadt_h_wd' - elif weekday == 5: - return 'aadt_h_sat' - elif weekday == 6: - return 'aadt_h_sun' + def get_weekday_speed_profile(x): + # Spead mean + if x.name <= 4: + x['speed_mean'] = df['sp_wd'] else: - print 'ERROR: Weekday not found' - exit() - - # Monthly factor - df = df.merge(self.monthly_profiles.reset_index(), left_on='aadt_m_mn', right_on='P_month', how='left') - df['MF'] = df.groupby('month').apply(lambda x: x[[x.name]]) - df.drop(columns=range(1, 12 + 1), inplace=True) - - # Daily factor - df = df.merge(self.weekly_profiles.reset_index(), left_on='aadt_week', right_on='P_week', how='left') - - df['WF'] = df.groupby('week_day').apply(lambda x: x[[x.name]]) - df.drop(columns=range(0, 7), inplace=True) + x['speed_mean'] = df['sp_we'] + + # Profile_ID + if x.name == 0: + x['P_speed'] = x['sp_hour_mo'] + elif x.name == 1: + x['P_speed'] = x['sp_hour_tu'] + elif x.name == 2: + x['P_speed'] = x['sp_hour_we'] + elif x.name == 3: + x['P_speed'] = x['sp_hour_th'] + elif x.name == 4: + x['P_speed'] = x['sp_hour_fr'] + elif x.name == 5: + x['P_speed'] = x['sp_hour_sa'] + elif x.name == 6: + x['P_speed'] = x['sp_hour_su'] + else: + x['P_speed'] = 1 # Flat profile - # Hourly factor - df['hourly_profile'] = df.groupby('week_day').apply(lambda x: x[[get_hourly_id_from_weekday(x.name)]]) - df.loc[df['hourly_profile'] == '', 'hourly_profile'] = df['aadt_h_mn'] + # Flat profile + x['P_speed'].replace([0, np.nan], 1, inplace=True) + x['P_speed'] = x['P_speed'].astype(int) - df['hourly_profile'] = df['hourly_profile'].astype(str) - self.hourly_profiles['P_hour'] = self.hourly_profiles['P_hour'].astype(str) + return x[['speed_mean', 'P_speed']] - df = df.merge(self.hourly_profiles, left_on='hourly_profile', right_on='P_hour', how='left') - df['HF'] = df.groupby('hour').apply(lambda x: x[[str(x.name)]]) + def get_velocity(x): + speed = self.speed_hourly.loc[np.unique(x['P_speed'].values), :] - self.logger.write_time_log('TrafficSector', 'calculate_temporal_factor', timeit.default_timer() - spent_time) - return df['MF'] * df['WF'] * df['HF'] + x = pd.merge(x, speed, left_on='P_speed', right_index=True, how='left') + x['speed'] = x.groupby('hour').apply(lambda y: x[[str(y.name)]]) - def calculate_time_dependent_values(self, df, timestep_type, timestep_num, timestep_freq): - spent_time = timeit.default_timer() + return x[['speed']] - df.reset_index(inplace=True) - for tstep in xrange(timestep_num): - # Finding weekday - # 0 -> Monday; 6 -> Sunday - df.loc[:, 'month'] = (df['start_date'] + self.calculate_timedelta( - timestep_type, tstep, timestep_freq)).dt.month - df.loc[:, 'week_day'] = (df['start_date'] + self.calculate_timedelta( - timestep_type, tstep, timestep_freq)).dt.weekday - df.loc[:, 'hour'] = (df['start_date'] + self.calculate_timedelta( - timestep_type, tstep, timestep_freq)).dt.hour - - # Selecting speed_mean - df.loc[df['week_day'] <= 4, 'speed_mean'] = df['sp_wd'] - df.loc[df['week_day'] > 4, 'speed_mean'] = df['sp_we'] - - # Selecting speed profile_id - df.loc[df['week_day'] == 0, 'profile_id'] = df['sp_hour_mo'] - df.loc[df['week_day'] == 1, 'profile_id'] = df['sp_hour_tu'] - df.loc[df['week_day'] == 2, 'profile_id'] = df['sp_hour_we'] - df.loc[df['week_day'] == 3, 'profile_id'] = df['sp_hour_th'] - df.loc[df['week_day'] == 4, 'profile_id'] = df['sp_hour_fr'] - df.loc[df['week_day'] == 5, 'profile_id'] = df['sp_hour_sa'] - df.loc[df['week_day'] == 6, 'profile_id'] = df['sp_hour_su'] - - df['profile_id'] = df['profile_id'].astype(int) - - # Selecting flat profile for 0 and nan's - df.loc[df['profile_id'] == 0, 'profile_id'] = 1 - df.loc[df['profile_id'] == np.nan, 'profile_id'] = 1 - - # Calculating speed by tstep - speed_column_name = 'v_{0}'.format(tstep) - df[speed_column_name] = self.calculate_hourly_speed(df.loc[:, ['hour', 'speed_mean', 'profile_id']]) - - factor_column_name = 'f_{0}'.format(tstep) - - df.loc[:, factor_column_name] = self.calculate_temporal_factor( - df.loc[:, ['month', 'week_day', 'hour', 'aadt_m_mn', 'aadt_week', 'aadt_h_mn', 'aadt_h_wd', - 'aadt_h_sat', 'aadt_h_sun']]) - - # Deleting time variables - - del df['month'], df['week_day'], df['hour'], df['profile_id'], df['speed_mean'] - del df['sp_wd'], df['sp_we'], df['index'] - del df['sp_hour_mo'], df['sp_hour_tu'], df['sp_hour_we'], df['sp_hour_th'], df['sp_hour_fr'] - del df['sp_hour_sa'], df['sp_hour_su'] - del df['aadt_m_mn'], df['aadt_h_mn'], df['aadt_h_wd'], df['aadt_h_sat'], df['aadt_h_sun'], df['aadt_week'] - del df['start_date'] + def get_temporal_factor(x): + def get_hourly_id_from_weekday(weekday): + if weekday <= 4: + return 'aadt_h_wd' + elif weekday == 5: + return 'aadt_h_sat' + elif weekday == 6: + return 'aadt_h_sun' + else: + raise KeyError('ERROR: Weekday not found') + + # Monthly factor + x = pd.merge(x, self.monthly_profiles, left_on='aadt_m_mn', right_index=True, how='left') + x['MF'] = x.groupby('month').apply(lambda y: x[[y.name]]) + x.drop(columns=range(1, 12 + 1), inplace=True) + + # Daily factor + x = pd.merge(x, self.weekly_profiles, left_on='aadt_week', right_index=True, how='left') + x['WF'] = x.groupby('weekday').apply(lambda y: x[[y.name]]) + x.drop(columns=range(0, 7), inplace=True) + + # Hourly factor + x.fillna(value=pd.np.nan, inplace=True) + x['hourly_profile'] = x.groupby('weekday').apply(lambda y: x[[get_hourly_id_from_weekday(y.name)]]) + x['hourly_profile'].fillna(x['aadt_h_mn'], inplace=True) + + x = pd.merge(x, self.hourly_profiles, left_on='hourly_profile', right_index=True, how='left') + x['HF'] = x.groupby('hour').apply(lambda y: x[[y.name]]) + x.drop(columns=range(0, 24), inplace=True) + x['factor'] = x['MF'] * x['WF'] * x['HF'] + + return x[['factor']] + + for i_t, tstep in enumerate(self.date_array): + df['aux_date'] = df['start_date'] + (tstep - self.date_array[0]) + df['month'] = df['aux_date'].dt.month + df['weekday'] = df['aux_date'].dt.weekday + df['hour'] = df['aux_date'].dt.hour + + df[['speed_mean', 'P_speed']] = df.groupby('weekday').apply(get_weekday_speed_profile) + + df['v_{0}'.format(i_t)] = get_velocity(df[['hour', 'speed_mean', 'P_speed']]) + df['f_{0}'.format(i_t)] = get_temporal_factor( + df[['month', 'weekday', 'hour', 'aadt_m_mn', 'aadt_week', 'aadt_h_mn', 'aadt_h_wd', 'aadt_h_sat', + 'aadt_h_sun']]) + + df.drop(columns=['month', 'weekday', 'hour', 'P_speed', 'speed_mean' ,'sp_wd' ,'sp_we', 'sp_hour_mo', + 'sp_hour_tu', 'sp_hour_we', 'sp_hour_th', 'sp_hour_fr', 'sp_hour_sa' ,'sp_hour_su', 'aux_date', + 'aadt_m_mn', 'aadt_h_mn', 'aadt_h_wd', 'aadt_h_sat', 'aadt_h_sun', 'aadt_week', 'start_date'], + inplace=True) + libc.malloc_trim(0) self.logger.write_time_log('TrafficSector', 'calculate_time_dependent_values', timeit.default_timer() - spent_time) - return df - def expand_road_links(self, timestep_type, timestep_num, timestep_freq): + def expand_road_links(self): spent_time = timeit.default_timer() # Expands each road link by any vehicle type that the selected road link has. df_list = [] - road_link_aux = self.road_links.copy() + road_link_aux = self.road_links.copy().reset_index() + + road_link_aux.drop(columns='geometry', inplace=True) + libc.malloc_trim(0) - del road_link_aux['geometry'] for zone, compo_df in road_link_aux.groupby('fleet_comp'): fleet = self.find_fleet(zone) df_aux = pd.merge(compo_df, fleet, how='left', on='fleet_comp') + df_aux.drop(columns='fleet_comp', inplace=True) df_list.append(df_aux) df = pd.concat(df_list, ignore_index=True) - libc.malloc_trim(0) - del df['fleet_comp'] + df.set_index(['Link_ID', 'Fleet_Code'], inplace=True) + libc.malloc_trim(0) df = self.update_fleet_value(df) - df = self.calculate_time_dependent_values(df, timestep_type, timestep_num, timestep_freq) + df = self.calculate_time_dependent_values(df) self.logger.write_time_log('TrafficSector', 'expand_road_links', timeit.default_timer() - spent_time) @@ -612,6 +618,7 @@ class TrafficSector(Sector): if pollutant != 'nh3': ef_code_slope_road, ef_code_slope, ef_code_road, ef_code = self.read_ef('hot', pollutant) + df_code_slope_road = expanded_aux.merge( ef_code_slope_road, left_on=['Fleet_Code', 'road_grad', 'Road_type'], right_on=['Code', 'Road.Slope', 'Mode'], how='inner') @@ -634,8 +641,8 @@ class TrafficSector(Sector): del expanded_aux['Code'], expanded_aux['Mode'] # Warnings and Errors - original_ef_profile = self.expanded['Fleet_Code'].unique() - calculated_ef_profiles = expanded_aux['Fleet_Code'].unique() + original_ef_profile = np.unique(self.expanded.index.get_level_values('Fleet_Code')) + calculated_ef_profiles = np.unique(expanded_aux['Fleet_Code']) resta_1 = [item for item in original_ef_profile if item not in calculated_ef_profiles] # Warining resta_2 = [item for item in calculated_ef_profiles if item not in original_ef_profile] # Error @@ -649,9 +656,9 @@ class TrafficSector(Sector): m_corr = self.read_mcorr_file(pollutant) if m_corr is not None: expanded_aux = expanded_aux.merge(m_corr, left_on='Fleet_Code', right_on='Code', how='left') - del expanded_aux['Code'] + expanded_aux.drop(columns=['Code'], inplace=True) - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): ef_name = 'ef_{0}_{1}'.format(pollutant, tstep) p_column = '{0}_{1}'.format(pollutant, tstep) if pollutant != 'nh3': @@ -662,14 +669,14 @@ class TrafficSector(Sector): expanded_aux['v_aux'] > expanded_aux['Max.Speed'], 'Max.Speed'] # EF - expanded_aux.loc[:, ef_name] = \ + expanded_aux[ef_name] = \ ((expanded_aux.Alpha * expanded_aux.v_aux**2 + expanded_aux.Beta * expanded_aux.v_aux + expanded_aux.Gamma + (expanded_aux.Delta / expanded_aux.v_aux)) / (expanded_aux.Epsilon * expanded_aux.v_aux**2 + expanded_aux.Zita * expanded_aux.v_aux + expanded_aux.Hta)) * (1 - expanded_aux.RF) * \ (expanded_aux.PF * expanded_aux['T'] / expanded_aux.Q) else: - expanded_aux.loc[:, ef_name] = \ + expanded_aux[ef_name] = \ ((expanded_aux['a'] * expanded_aux['Cmileage'] + expanded_aux['b']) * (expanded_aux['EFbase'] * expanded_aux['TF'])) / 1000 @@ -692,33 +699,31 @@ class TrafficSector(Sector): expanded_aux.loc[:, p_column] = \ expanded_aux['Fleet_value'] * expanded_aux[ef_name] * expanded_aux['Mcorr'] * \ expanded_aux['f_{0}'.format(tstep)] - del expanded_aux[ef_name], expanded_aux['Mcorr'] + expanded_aux.drop(columns=[ef_name, 'Mcorr'], inplace=True) if pollutant != 'nh3': - del expanded_aux['v_aux'] - del expanded_aux['Min.Speed'], expanded_aux['Max.Speed'], expanded_aux['Alpha'], expanded_aux['Beta'] - del expanded_aux['Gamma'], expanded_aux['Delta'], expanded_aux['Epsilon'], expanded_aux['Zita'] - del expanded_aux['Hta'], expanded_aux['RF'], expanded_aux['Q'], expanded_aux['PF'], expanded_aux['T'] + expanded_aux.drop(columns=['v_aux', 'Min.Speed', 'Max.Speed', 'Alpha', 'Beta', 'Gamma', 'Delta', + 'Epsilon', 'Zita', 'Hta', 'RF', 'Q', 'PF', 'T'], inplace=True) else: - del expanded_aux['a'], expanded_aux['Cmileage'], expanded_aux['b'], expanded_aux['EFbase'] - del expanded_aux['TF'] + expanded_aux.drop(columns=['a', 'Cmileage', 'b', 'EFbase', 'TF'], inplace=True) if m_corr is not None: - del expanded_aux['A_urban'], expanded_aux['B_urban'], expanded_aux['A_road'], expanded_aux['B_road'] - del expanded_aux['M'] - - del expanded_aux['road_grad'] + expanded_aux.drop(columns=['A_urban', 'B_urban', 'A_road', 'B_road', 'M'], inplace=True) + expanded_aux.drop(columns=['road_grad'], inplace=True) + expanded_aux.drop(columns=['f_{0}'.format(x) for x in range(len(self.date_array))], inplace=True) - for tstep in xrange(self.timestep_num): - del expanded_aux['f_{0}'.format(tstep)] + libc.malloc_trim(0) self.logger.write_time_log('TrafficSector', 'calculate_hot', timeit.default_timer() - spent_time) + # print(expanded_aux) + # print(expanded_aux.columns.values) + # expanded_aux.to_csv('~/temp/hot_expanded2.csv') return expanded_aux def calculate_cold(self, hot_expanded): spent_time = timeit.default_timer() - cold_links = self.road_links.copy() + cold_links = self.road_links.copy().reset_index() del cold_links['aadt'], cold_links['PcHeavy'], cold_links['PcMoto'], cold_links['PcMoped'], cold_links['sp_wd'] del cold_links['sp_we'], cold_links['sp_hour_su'], cold_links['sp_hour_mo'], cold_links['sp_hour_tu'] @@ -728,17 +733,17 @@ class TrafficSector(Sector): del cold_links['road_grad'], cold_links['PcLight'], cold_links['start_date'] libc.malloc_trim(0) - cold_links.loc[:, 'centroid'] = cold_links['geometry'].centroid + cold_links['centroid'] = cold_links['geometry'].centroid link_lons = cold_links['geometry'].centroid.x link_lats = cold_links['geometry'].centroid.y temperature = IoNetcdf(self.comm).get_hourly_data_from_netcdf( link_lons.min(), link_lons.max(), link_lats.min(), link_lats.max(), self.temp_common_path, 'tas', self.date_array) - temperature.rename(columns={x: 't_{0}'.format(x) for x in xrange(len(self.date_array))}, inplace=True) + temperature.rename(columns={x: 't_{0}'.format(x) for x in range(len(self.date_array))}, inplace=True) # From Kelvin to Celsius degrees - temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] = \ - temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] - 273.15 + temperature[['t_{0}'.format(x) for x in range(len(self.date_array))]] = \ + temperature[['t_{0}'.format(x) for x in range(len(self.date_array))]] - 273.15 unary_union = temperature.unary_union cold_links['REC'] = cold_links.apply(self.nearest, geom_union=unary_union, df1=cold_links, df2=temperature, @@ -767,11 +772,10 @@ class TrafficSector(Sector): right_on=['Code', 'Mode'], how='inner') cold_exp_p_aux = c_expanded_p.copy() - del cold_exp_p_aux['index_right_x'], cold_exp_p_aux['Road_type'], cold_exp_p_aux['Fleet_value'] - del cold_exp_p_aux['Code'] + cold_exp_p_aux.drop(columns=['Road_type', 'Fleet_value', 'Code'], inplace=True) libc.malloc_trim(0) - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): v_column = 'v_{0}'.format(tstep) p_column = '{0}_{1}'.format(pollutant, tstep) t_column = 't_{0}'.format(tstep) @@ -782,16 +786,16 @@ class TrafficSector(Sector): cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[v_column] < cold_exp_p_aux['Max.Speed'], :] # Beta - cold_exp_p_aux.loc[:, 'Beta'] = \ + cold_exp_p_aux['Beta'] = \ (0.6474 - (0.02545 * cold_exp_p_aux['ltrip']) - (0.00974 - (0.000385 * cold_exp_p_aux['ltrip'])) * cold_exp_p_aux[t_column]) * cold_exp_p_aux['bc'] if pollutant != 'nh3': - cold_exp_p_aux.loc[:, 'cold_hot'] = \ + cold_exp_p_aux['cold_hot'] = \ cold_exp_p_aux['A'] * cold_exp_p_aux[v_column] + cold_exp_p_aux['B'] * \ cold_exp_p_aux[t_column] + cold_exp_p_aux['C'] else: - cold_exp_p_aux.loc[:, 'cold_hot'] = \ + cold_exp_p_aux['cold_hot'] = \ ((cold_exp_p_aux['a'] * cold_exp_p_aux['Cmileage'] + cold_exp_p_aux['b']) * cold_exp_p_aux['EFbase'] * cold_exp_p_aux['TF']) / \ ((cold_exp_p_aux['a_hot'] * cold_exp_p_aux['Cmileage'] + cold_exp_p_aux['b_hot']) * @@ -799,9 +803,9 @@ class TrafficSector(Sector): cold_exp_p_aux.loc[cold_exp_p_aux['cold_hot'] < 1, 'cold_hot'] = 1 # Formula Cold emissions - cold_exp_p_aux.loc[:, p_column] = \ + cold_exp_p_aux[p_column] = \ cold_exp_p_aux[p_column] * cold_exp_p_aux['Beta'] * (cold_exp_p_aux['cold_hot'] - 1) - df_list.append((cold_exp_p_aux.loc[:, ['Link_ID', 'Fleet_Code', p_column]]).set_index( + df_list.append((cold_exp_p_aux[['Link_ID', 'Fleet_Code', p_column]]).set_index( ['Link_ID', 'Fleet_Code'])) try: @@ -819,14 +823,14 @@ class TrafficSector(Sector): error_fleet_code.append(o) raise IndexError('There are duplicated values for {0} codes in the cold EF files.'.format(error_fleet_code)) - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): if 'pm' in self.source_pollutants: - cold_df.loc[:, 'pm10_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] - cold_df.loc[:, 'pm25_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] + cold_df['pm10_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] + cold_df['pm25_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] del cold_df['pm_{0}'.format(tstep)] libc.malloc_trim(0) if 'voc' in self.source_pollutants and 'ch4' in self.source_pollutants: - cold_df.loc[:, 'nmvoc_{0}'.format(tstep)] = \ + cold_df['nmvoc_{0}'.format(tstep)] = \ cold_df['voc_{0}'.format(tstep)] - cold_df['ch4_{0}'.format(tstep)] del cold_df['voc_{0}'.format(tstep)] libc.malloc_trim(0) @@ -844,19 +848,19 @@ class TrafficSector(Sector): def compact_hot_expanded(self, expanded): spent_time = timeit.default_timer() - columns_to_delete = ['Road_type', 'Fleet_value'] + ['v_{0}'.format(x) for x in xrange(self.timestep_num)] + columns_to_delete = ['Road_type', 'Fleet_value'] + ['v_{0}'.format(x) for x in range(len(self.date_array))] for column_name in columns_to_delete: del expanded[column_name] - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): if 'pm' in self.source_pollutants: - expanded.loc[:, 'pm10_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] - expanded.loc[:, 'pm25_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] + expanded['pm10_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] + expanded['pm25_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] del expanded['pm_{0}'.format(tstep)] if 'voc' in self.source_pollutants and 'ch4' in self.source_pollutants: - expanded.loc[:, 'nmvoc_{0}'.format(tstep)] = expanded['voc_{0}'.format(tstep)] - \ - expanded['ch4_{0}'.format(tstep)] + expanded['nmvoc_{0}'.format(tstep)] = expanded['voc_{0}'.format(tstep)] - \ + expanded['ch4_{0}'.format(tstep)] del expanded['voc_{0}'.format(tstep)] else: self.logger.write_log("nmvoc emissions cannot be estimated because voc or ch4 are not selected in " + @@ -875,9 +879,10 @@ class TrafficSector(Sector): pollutants = ['pm'] for pollutant in pollutants: ef_tyre = self.read_ef('tyre', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') - del df['road_grad'], df['Road_type'], df['Code'] - for tstep in xrange(self.timestep_num): + df = pd.merge(self.expanded.reset_index(), ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + df.drop(columns=['road_grad', 'Road_type','Code'], inplace=True) + + for tstep in range(len(self.date_array)): p_column = '{0}_{1}'.format(pollutant, tstep) f_column = 'f_{0}'.format(tstep) v_column = 'v_{0}'.format(tstep) @@ -888,16 +893,15 @@ class TrafficSector(Sector): # from PM to PM10 & PM2.5 if pollutant == 'pm': - df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.6 - df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.42 + df['pm10_{0}'.format(tstep)] = df[p_column] * 0.6 + df['pm25_{0}'.format(tstep)] = df[p_column] * 0.42 del df[p_column] # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange( - self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] + columns_to_delete = ['f_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['v_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['Fleet_value', 'EFbase'] + df.drop(columns=columns_to_delete, inplace=True) df = self.speciate_traffic(df, self.tyre_speciation) self.logger.write_time_log('TrafficSector', 'calculate_tyre_wear', timeit.default_timer() - spent_time) @@ -909,9 +913,9 @@ class TrafficSector(Sector): pollutants = ['pm'] for pollutant in pollutants: ef_tyre = self.read_ef('brake', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') - del df['road_grad'], df['Road_type'], df['Code'] - for tstep in xrange(self.timestep_num): + df = pd.merge(self.expanded.reset_index(), ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + df.drop(columns=['road_grad', 'Road_type', 'Code'], inplace=True) + for tstep in range(len(self.date_array)): p_column = '{0}_{1}'.format(pollutant, tstep) f_column = 'f_{0}'.format(tstep) v_column = 'v_{0}'.format(tstep) @@ -927,11 +931,11 @@ class TrafficSector(Sector): del df[p_column] # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange( - self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] + columns_to_delete = ['f_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['v_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['Fleet_value', 'EFbase'] + df.drop(columns=columns_to_delete, inplace=True) + libc.malloc_trim(0) df = self.speciate_traffic(df, self.brake_speciation) @@ -944,9 +948,9 @@ class TrafficSector(Sector): pollutants = ['pm'] for pollutant in pollutants: ef_tyre = self.read_ef('road', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') - del df['road_grad'], df['Road_type'], df['Code'] - for tstep in xrange(self.timestep_num): + df = pd.merge(self.expanded.reset_index(), ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + df.drop(columns=['road_grad', 'Road_type', 'Code'], inplace=True) + for tstep in range(len(self.date_array)): p_column = '{0}_{1}'.format(pollutant, tstep) f_column = 'f_{0}'.format(tstep) df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] @@ -958,11 +962,10 @@ class TrafficSector(Sector): del df[p_column] # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange( - self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] + columns_to_delete = ['f_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['v_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['Fleet_value', 'EFbase'] + df.drop(columns=columns_to_delete, inplace=True) df = self.speciate_traffic(df, self.road_speciation) @@ -973,9 +976,9 @@ class TrafficSector(Sector): spent_time = timeit.default_timer() if self.resuspension_correction: - road_link_aux = self.road_links.loc[:, ['Link_ID', 'geometry']].copy() + road_link_aux = self.road_links[['geometry']].copy().reset_index() - road_link_aux.loc[:, 'centroid'] = road_link_aux['geometry'].centroid + road_link_aux['centroid'] = road_link_aux['geometry'].centroid link_lons = road_link_aux['geometry'].centroid.x link_lats = road_link_aux['geometry'].centroid.y @@ -994,33 +997,32 @@ class TrafficSector(Sector): pollutants = ['pm'] for pollutant in pollutants: ef_tyre = self.read_ef('resuspension', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + df = pd.merge(self.expanded.reset_index(), ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') if self.resuspension_correction: df = df.merge(road_link_aux, left_on='Link_ID', right_on='Link_ID', how='left') del df['road_grad'], df['Road_type'], df['Code'] - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): p_column = '{0}_{1}'.format(pollutant, tstep) f_column = 'f_{0}'.format(tstep) if self.resuspension_correction: pr_column = 'PR_{0}'.format(tstep) - df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[pr_column] * df[f_column] + df[p_column] = df['Fleet_value'] * df['EFbase'] * df[pr_column] * df[f_column] else: - df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] + df[p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] # from PM to PM10 & PM2.5 if pollutant == 'pm': - df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] + df['pm10_{0}'.format(tstep)] = df[p_column] # TODO Check fraction of pm2.5 - df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.5 + df['pm25_{0}'.format(tstep)] = df[p_column] * 0.5 del df[p_column] # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in - xrange(self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] + columns_to_delete = ['f_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['v_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['Fleet_value', 'EFbase'] + df.drop(columns=columns_to_delete, inplace=True) df = self.speciate_traffic(df, self.resuspension_speciation) @@ -1031,7 +1033,7 @@ class TrafficSector(Sector): spent_time = timeit.default_timer() df_list = [] - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): pollutants_to_rename = [p for p in list(df.columns.values) if p.endswith('_{0}'.format(tstep))] pollutants_renamed = [] for p_name in pollutants_to_rename: @@ -1039,7 +1041,7 @@ class TrafficSector(Sector): df.rename(columns={p_name: p_name_new}, inplace=True) pollutants_renamed.append(p_name_new) - df_aux = pd.DataFrame(df.loc[:, ['Link_ID', 'Fleet_Code'] + pollutants_renamed]) + df_aux = df[['Link_ID', 'Fleet_Code'] + pollutants_renamed].copy() df_aux['tstep'] = tstep df_list.append(df_aux) @@ -1071,28 +1073,28 @@ class TrafficSector(Sector): # PMC if not set(speciation.columns.values).isdisjoint(pmc_list): out_p = set(speciation.columns.values).intersection(pmc_list).pop() - speciation_by_in_p = speciation.loc[:, [out_p] + ['Code']] + speciation_by_in_p = speciation[[out_p] + ['Code']] speciation_by_in_p.rename(columns={out_p: 'f_{0}'.format(out_p)}, inplace=True) - df_aux = df.loc[:, ['pm10', 'pm25', 'Fleet_Code', 'tstep', 'Link_ID']] + df_aux = df[['pm10', 'pm25', 'Fleet_Code', 'tstep', 'Link_ID']] df_aux = df_aux.merge(speciation_by_in_p, left_on='Fleet_Code', right_on='Code', how='left') df_aux.drop(columns=['Code'], inplace=True) - df_aux.loc[:, out_p] = df_aux['pm10'] - df_aux['pm25'] + df_aux[out_p] = df_aux['pm10'] - df_aux['pm25'] + + df_out_list.append(df_aux[[out_p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) - df_out_list.append(df_aux.loc[:, [out_p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) - del df_aux[out_p] for in_p in in_list: - involved_out_pollutants = [key for key, value in self.speciation_map.iteritems() if value == in_p] + involved_out_pollutants = [key for key, value in self.speciation_map.items() if value == in_p] # Selecting only necessary speciation profiles - speciation_by_in_p = speciation.loc[:, involved_out_pollutants + ['Code']] + speciation_by_in_p = speciation[involved_out_pollutants + ['Code']] # Adding "f_" in the formula column names for p in involved_out_pollutants: speciation_by_in_p.rename(columns={p: 'f_{0}'.format(p)}, inplace=True) # Getting a slice of the full dataset to be merged - df_aux = df.loc[:, [in_p] + ['Fleet_Code', 'tstep', 'Link_ID']] + df_aux = df[[in_p] + ['Fleet_Code', 'tstep', 'Link_ID']] df_aux = df_aux.merge(speciation_by_in_p, left_on='Fleet_Code', right_on='Code', how='left') df_aux.drop(columns=['Code'], inplace=True) @@ -1101,7 +1103,7 @@ class TrafficSector(Sector): for p in involved_out_pollutants: if in_p is not np.nan: if in_p != 0: - df_aux.loc[:, p] = df_aux['old_{0}'.format(in_p)].multiply(df_aux['f_{0}'.format(p)]) + df_aux[p] = df_aux['old_{0}'.format(in_p)].multiply(df_aux['f_{0}'.format(p)]) try: if in_p == 'nmvoc': mol_w = 1.0 @@ -1110,13 +1112,12 @@ class TrafficSector(Sector): except KeyError: raise AttributeError('{0} not found in the molecular weights file.'.format(in_p)) # from g/km.h to mol/km.h or g/km.h (aerosols) - df_aux.loc[:, p] = df_aux.loc[:, p] / mol_w + df_aux[p] = df_aux.loc[:, p] / mol_w else: df_aux.loc[:, p] = 0 - df_out_list.append(df_aux.loc[:, [p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) - del df_aux[p] + df_out_list.append(df_aux[[p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) del df_aux del df[in_p] @@ -1161,22 +1162,24 @@ class TrafficSector(Sector): if self.do_tyre_wear: self.logger.write_log('\t\tCalculating Tyre wear emissions.', message_level=2) - df_accum = pd.concat([df_accum, self.calculate_tyre_wear()]).groupby(['tstep', 'Link_ID']).sum() + df_accum = pd.concat([df_accum, self.calculate_tyre_wear()], sort=False).groupby(['tstep', 'Link_ID']).sum() libc.malloc_trim(0) if self.do_brake_wear: self.logger.write_log('\t\tCalculating Brake wear emissions.', message_level=2) - df_accum = pd.concat([df_accum, self.calculate_brake_wear()]).groupby(['tstep', 'Link_ID']).sum() + df_accum = pd.concat([df_accum, self.calculate_brake_wear()], sort=False).groupby( + ['tstep', 'Link_ID']).sum() libc.malloc_trim(0) if self.do_road_wear: self.logger.write_log('\t\tCalculating Road wear emissions.', message_level=2) - df_accum = pd.concat([df_accum, self.calculate_road_wear()]).groupby(['tstep', 'Link_ID']).sum() + df_accum = pd.concat([df_accum, self.calculate_road_wear()], sort=False).groupby(['tstep', 'Link_ID']).sum() libc.malloc_trim(0) if self.do_resuspension: self.logger.write_log('\t\tCalculating Resuspension emissions.', message_level=2) - df_accum = pd.concat([df_accum, self.calculate_resuspension()]).groupby(['tstep', 'Link_ID']).sum() + df_accum = pd.concat([df_accum, self.calculate_resuspension()], sort=False).groupby( + ['tstep', 'Link_ID']).sum() libc.malloc_trim(0) - df_accum = df_accum.reset_index().merge(self.road_links.loc[:, ['Link_ID', 'geometry']], left_on='Link_ID', - right_on='Link_ID', how='left') + df_accum = df_accum.reset_index().merge(self.road_links.reset_index().loc[:, ['Link_ID', 'geometry']], + on='Link_ID', how='left') df_accum = gpd.GeoDataFrame(df_accum, crs=self.crs) libc.malloc_trim(0) df_accum.set_index(['Link_ID', 'tstep'], inplace=True) diff --git a/hermesv3_bu/writer/cmaq_writer.py b/hermesv3_bu/writer/cmaq_writer.py index b74c9d25bb80351a1718a9d8562defc0ac2166c2..0649c89181da5f3e440ffd21b2eefa6681a3c096 100755 --- a/hermesv3_bu/writer/cmaq_writer.py +++ b/hermesv3_bu/writer/cmaq_writer.py @@ -134,15 +134,17 @@ class CmaqWriter(Writer): """ spent_time = timeit.default_timer() - a = np.array([[[]]]) + t_flag = np.empty((len(self.date_array), len(self.pollutant_info), 2)) - for date in self.date_array: - b = np.array([[int(date.strftime('%Y%j'))], [int(date.strftime('%H%M%S'))]] * len(self.pollutant_info)) - a = np.append(a, b) + for i_d, date in enumerate(self.date_array): + y_d = int(date.strftime('%Y%j')) + hms = int(date.strftime('%H%M%S')) + for i_p in range(len(self.pollutant_info)): + t_flag[i_d, i_p, 0] = y_d + t_flag[i_d, i_p, 1] = hms - a.shape = (len(self.date_array), 2, len(self.pollutant_info)) self.logger.write_time_log('CmaqWriter', 'create_tflag', timeit.default_timer() - spent_time) - return a + return t_flag def str_var_list(self): """ @@ -180,7 +182,7 @@ class CmaqWriter(Writer): df = pd.read_csv(global_attributes_path) - for att in atts_dict.iterkeys(): + for att in atts_dict.keys(): try: if att in int_atts: atts_dict[att] = np.int32(df.loc[df['attribute'] == att, 'value'].item()) @@ -282,6 +284,7 @@ class CmaqWriter(Writer): tflag = netcdf.createVariable('TFLAG', 'i', ('TSTEP', 'VAR', 'DATE-TIME',)) tflag.setncatts({'units': "{:<16}".format(''), 'long_name': "{:<16}".format('TFLAG'), 'var_desc': "{:<80}".format('Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS')}) + tflag[:] = self.create_tflag() # ========== POLLUTANTS ========== diff --git a/hermesv3_bu/writer/wrfchem_writer.py b/hermesv3_bu/writer/wrfchem_writer.py index 693727c136d67ef889a45a804398b27bf693cfa9..4943f99c58a75604255a139e01d2d33b3d671736 100755 --- a/hermesv3_bu/writer/wrfchem_writer.py +++ b/hermesv3_bu/writer/wrfchem_writer.py @@ -228,7 +228,7 @@ class WrfChemWriter(Writer): df = pd.read_csv(global_attributes_path) - for att in atts_dict.iterkeys(): + for att in atts_dict.keys(): try: if att in int_atts: atts_dict[att] = np.int32(df.loc[df['attribute'] == att, 'value'].item()) diff --git a/hermesv3_bu/writer/writer.py b/hermesv3_bu/writer/writer.py index 0da57438bf3bea439c8bf5006abe51acfb07fd3a..06847f5e7f28f4a082fe661ad6849a7f7ca2c31c 100755 --- a/hermesv3_bu/writer/writer.py +++ b/hermesv3_bu/writer/writer.py @@ -129,7 +129,7 @@ def get_distribution(logger, processors, shape): aux_rows -= 1 rows_sum = 0 - for proc in xrange(processors): + for proc in range(processors): total_rows -= aux_rows if total_rows < 0 or proc == processors - 1: rows = total_rows + aux_rows @@ -193,7 +193,7 @@ def get_balanced_distribution(logger, processors, shape): procs_rows_extended = total_rows-(procs_rows*processors) rows_sum = 0 - for proc in xrange(processors): + for proc in range(processors): if proc < procs_rows_extended: aux_rows = procs_rows + 1 else: @@ -308,7 +308,7 @@ class Writer(object): # Sending self.logger.write_log('Sending emissions to the writing processors.', message_level=2) requests = [] - for w_rank, info in self.rank_distribution.iteritems(): + for w_rank, info in self.rank_distribution.items(): partial_emis = emissions.loc[(emissions.index.get_level_values(0) >= info['fid_min']) & (emissions.index.get_level_values(0) < info['fid_max'])] @@ -320,40 +320,29 @@ class Writer(object): # Receiving self.logger.write_log('Receiving emissions in the writing processors.', message_level=2) - if self.comm_world.Get_rank() in self.rank_distribution.iterkeys(): + if self.comm_world.Get_rank() in self.rank_distribution.keys(): self.logger.write_log("I'm a writing processor.", message_level=3) data_list = [] self.logger.write_log("Prepared to receive", message_level=3) - for i_rank in xrange(self.comm_world.Get_size()): - # print self.rank_distribution[i_rank] - # print reduce(lambda x, y: x * y, self.rank_distribution[i_rank]['shape']) - # req = self.comm_world.irecv(source=i_rank, tag=i_rank + MPI_TAG_CONSTANT) - # data_size = req.wait() - + for i_rank in range(self.comm_world.Get_size()): self.logger.write_log( '\tFrom {0} to {1}'.format(i_rank, self.comm_world.Get_rank()), message_level=3) req = self.comm_world.irecv(2**27, source=i_rank, tag=i_rank) dataframe = req.wait() data_list.append(dataframe.reset_index()) - # print "I'm Rank {0} DataList: \n {1}".format(self.comm_world.Get_rank(), data_list) - # new_emissions = pd.concat(data_list).reset_index().groupby(['FID', 'layer', 'tstep']).sum() + new_emissions = pd.concat(data_list) new_emissions[['FID', 'layer', 'tstep']] = new_emissions[['FID', 'layer', 'tstep']].astype(np.int32) - # new_emissions.reset_index(inplace=True) new_emissions = new_emissions.groupby(['FID', 'layer', 'tstep']).sum() - # try: - # new_emissions = new_emissions.groupby(['FID', 'layer', 'tstep']).sum() - # except KeyError as e: - # print "I'm Rank {0} ERROR on: \n {1}".format(self.comm_world.Get_rank(), new_emissions) - # raise e + else: new_emissions = None self.comm_world.Barrier() self.logger.write_log('All emissions received.', message_level=2) - if self.emission_summary and self.comm_world.Get_rank() in self.rank_distribution.iterkeys(): + if self.emission_summary and self.comm_world.Get_rank() in self.rank_distribution.keys(): self.make_summary(new_emissions) self.logger.write_time_log('Writer', 'gather_emissions', timeit.default_timer() - spent_time) @@ -396,7 +385,7 @@ class Writer(object): spent_time = timeit.default_timer() emissions = self.unit_change(emissions) emissions = self.gather_emissions(emissions) - if self.comm_world.Get_rank() in self.rank_distribution.iterkeys(): + if self.comm_world.Get_rank() in self.rank_distribution.keys(): self.write_netcdf(emissions) self.comm_world.Barrier() diff --git a/setup.py b/setup.py index 1472ab30ddee0cbb7fa4837b6301d22e58aefb58..4159598584de18c17c3d0330b911ad9069a4b6fc 100755 --- a/setup.py +++ b/setup.py @@ -35,7 +35,6 @@ setup( 'pyproj', 'configargparse', 'cf_units>=1.1.3', - 'holidays', 'pytz', 'timezonefinder', 'mpi4py', @@ -45,7 +44,7 @@ setup( ], packages=find_packages(), classifiers=[ - "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.7", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Topic :: Scientific/Engineering :: Atmospheric Science"