From db52d6bc7223a5a80f7b57e6ca45b1f9d4351378 Mon Sep 17 00:00:00 2001 From: Pushpak Gote Date: Wed, 21 Aug 2024 16:22:21 +0530 Subject: [PATCH 1/3] Fixed duplicate neighbouring names and ids --- mile-point-approach/get_neighbouring_roads.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mile-point-approach/get_neighbouring_roads.py b/mile-point-approach/get_neighbouring_roads.py index bcc23ff..544b0ac 100644 --- a/mile-point-approach/get_neighbouring_roads.py +++ b/mile-point-approach/get_neighbouring_roads.py @@ -207,7 +207,7 @@ def group_and_aggregate(df: pd.DataFrame) -> gpd.GeoDataFrame: Group by geometry and aggregate specified columns. Args: - df (pd.DataFrame): Input DataFrame. + df (pd.DataFrame): Input DataFrame. Returns: gpd.GeoDataFrame: Grouped and aggregated GeoDataFrame. @@ -216,10 +216,14 @@ def group_and_aggregate(df: pd.DataFrame) -> gpd.GeoDataFrame: GroupingError: If there's an error during grouping and aggregation. """ try: - grouped = df.groupby(['geometry', 'created_unique_id_1_left', 'bridge_id_left']).agg({ + + #Spatial join sometimes gives same rows within a buffer, so drop duplicates + drop_duplicates_cols=['geometry', 'created_unique_id_1_left', 'bridge_id_left','created_unique_id_1_right','RD_NAME_right'] + grouped = df[drop_duplicates_cols].drop_duplicates().groupby(['geometry', 'created_unique_id_1_left', 'bridge_id_left']).agg({ 'created_unique_id_1_right': lambda x: ', '.join(x.astype(str)), 'RD_NAME_right': lambda x: ', '.join(x.astype(str)), }).reset_index() + return gpd.GeoDataFrame(grouped, geometry='geometry', crs=df.crs) except KeyError as e: logger.error(f"Grouping error: missing column {str(e)}") @@ -284,6 +288,9 @@ def load_and_transform_data() -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: osm_road_points = read_geopackage(FilePath.OSM_ROAD_POINTS.value) state_road = read_geopackage(FilePath.STATE_ROAD.value) + #Change 'NAME' to column name that contains road names from state_road dataset + # state_road.rename(columns={'NAME':'RD_NAME'}, inplace=True) + logger.info(f"OSM Road Points CRS: {osm_road_points.crs}") logger.info(f"State Road CRS: {state_road.crs}") From 06ab1dd507ff013a6ffed8d84e9ceeca8433f04e Mon Sep 17 00:00:00 2001 From: Pushpak Gote Date: Wed, 21 Aug 2024 16:31:52 +0530 Subject: [PATCH 2/3] Uncommented line --- mile-point-approach/get_neighbouring_roads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mile-point-approach/get_neighbouring_roads.py b/mile-point-approach/get_neighbouring_roads.py index 544b0ac..3043793 100644 --- a/mile-point-approach/get_neighbouring_roads.py +++ b/mile-point-approach/get_neighbouring_roads.py @@ -289,7 +289,7 @@ def load_and_transform_data() -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: state_road = read_geopackage(FilePath.STATE_ROAD.value) #Change 'NAME' to column name that contains road names from state_road dataset - # state_road.rename(columns={'NAME':'RD_NAME'}, inplace=True) + state_road.rename(columns={'NAME':'RD_NAME'}, inplace=True) logger.info(f"OSM Road Points CRS: {osm_road_points.crs}") logger.info(f"State Road CRS: {state_road.crs}") From cdbdeb64ae5639cfdb8f5a26c311a12e1c5afaa8 Mon Sep 17 00:00:00 2001 From: Pushpak Gote Date: Wed, 21 Aug 2024 16:40:03 +0530 Subject: [PATCH 3/3] Changed comment --- mile-point-approach/get_neighbouring_roads.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mile-point-approach/get_neighbouring_roads.py b/mile-point-approach/get_neighbouring_roads.py index 3043793..3300293 100644 --- a/mile-point-approach/get_neighbouring_roads.py +++ b/mile-point-approach/get_neighbouring_roads.py @@ -288,8 +288,8 @@ def load_and_transform_data() -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: osm_road_points = read_geopackage(FilePath.OSM_ROAD_POINTS.value) state_road = read_geopackage(FilePath.STATE_ROAD.value) - #Change 'NAME' to column name that contains road names from state_road dataset - state_road.rename(columns={'NAME':'RD_NAME'}, inplace=True) + #Change 'NAME' to column name that contains road names from state_road dataset, if its RD_NAME then no need to change + # state_road.rename(columns={'NAME':'RD_NAME'}, inplace=True) logger.info(f"OSM Road Points CRS: {osm_road_points.crs}") logger.info(f"State Road CRS: {state_road.crs}")