Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding logging for Hydrography scripts #31

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions hydrography-approach/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,8 @@ output_files:
bridge_with_proj_points: "output-data/{{ state }}/csv-files/bridge-osm-association-with-projected-points.csv"
bridge_match_percentage: "output-data/{{ state }}/csv-files/Association-match-check-with-percentage.csv"
final_bridges_csv: "output-data/{{ state }}/csv-files/Final-bridges-with-percentage-match.csv"

logging:
log_file_path: "hydrography-pipeline.log"


Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def merge_join_data_with_intersections(all_join_csv, intersections_csv):
return df


def create_intermediate_association(df, intermediate_association):
def create_intermediate_association(df, intermediate_association, logger):
"""
Function to create intermediate association among bridges and ways.
"""
Expand Down Expand Up @@ -200,12 +200,12 @@ def create_intermediate_association(df, intermediate_association):

# Save intermediate results
df.to_csv(intermediate_association)
print(f"\n{intermediate_association} file has been created successfully!")
logger.info(f"{intermediate_association} file has been created successfully!")

return df


def create_final_associations(df, association_with_intersections):
def create_final_associations(df, association_with_intersections, logger):
"""
Function to create final association among bridges and ways.
"""
Expand Down Expand Up @@ -233,12 +233,12 @@ def create_final_associations(df, association_with_intersections):
association_with_intersections,
index=False,
)
print(f"\n{association_with_intersections} file has been created successfully!")
logger.info(f"{association_with_intersections} file has been created successfully!")

return df


def add_bridge_details(df, nbi_bridge_data, bridge_association_lengths):
def add_bridge_details(df, nbi_bridge_data, bridge_association_lengths, logger):
"""
Function to add bridge information to associated data.
"""
Expand Down Expand Up @@ -289,8 +289,8 @@ def add_bridge_details(df, nbi_bridge_data, bridge_association_lengths):
bridge_association_lengths,
index=False,
)
print(
f"\n{bridge_association_lengths} file has been created successfully!"
logger.info(
f"{bridge_association_lengths} file has been created successfully!"
)


Expand All @@ -300,11 +300,12 @@ def process_final_id(
intermediate_association,
association_with_intersections,
nbi_bridge_data,
bridge_association_lengths
bridge_association_lengths,
logger
):
df = merge_join_data_with_intersections(all_join_csv, intersections_csv)
intermediate_df = create_intermediate_association(df, intermediate_association)
intermediate_df = create_intermediate_association(df, intermediate_association, logger)
final_df = create_final_associations(
intermediate_df, association_with_intersections
intermediate_df, association_with_intersections, logger
)
add_bridge_details(final_df, nbi_bridge_data, bridge_association_lengths)
add_bridge_details(final_df, nbi_bridge_data, bridge_association_lengths, logger)
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def load_nearby_join(csv_file):
return pd.read_csv(csv_file)


def filter_duplicates_and_output(bridge_df, join_df, output_csv):
def filter_duplicates_and_output(bridge_df, join_df, output_csv, logger):
"""Filter duplicates based on osm_similarity score and output filtered bridge info."""

filtered_df = join_df[
Expand All @@ -38,7 +38,7 @@ def filter_duplicates_and_output(bridge_df, join_df, output_csv):

except IndexError:
# Handle the case where ID is not found in bridge_df
print(f"id {sn1} or {sn2} not found in bridge_df")
logger.info(f"id {sn1} or {sn2} not found in bridge_df")
continue

# Determine which ID to retain based on osm_similarity score
Expand All @@ -51,20 +51,19 @@ def filter_duplicates_and_output(bridge_df, join_df, output_csv):
else:
continue

# Print set of IDs that are retained
print("IDs to be removed:", remove_ids)
# logger.info("IDs to be removed:", remove_ids)

# Filter bridge_df based on retain_ids and output to a new CSV
filtered_bridge_df = bridge_df[~bridge_df["8 - Structure Number"].isin(remove_ids)]
filtered_bridge_df.to_csv(output_csv, index=False)

print(f"Filtered bridge information saved to '{output_csv}'.")
logger.info(f"Filtered bridge information saved to '{output_csv}'.")


def run(bridge_match_percentage, nearby_join_csv, final_bridges_csv):
def run(bridge_match_percentage, nearby_join_csv, final_bridges_csv, logger):
# Load data
bridge_df = load_bridge_info(bridge_match_percentage)
join_df = load_nearby_join(nearby_join_csv)

# Filter duplicates based on osm_similarity score and output filtered bridge info
filter_duplicates_and_output(bridge_df, join_df, final_bridges_csv)
filter_duplicates_and_output(bridge_df, join_df, final_bridges_csv, logger)
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd


def process_all_join(nbi_30_join_csv, nbi_10_join_csv, all_join_dask, all_join_csv):
def process_all_join(nbi_30_join_csv, nbi_10_join_csv, all_join_dask, all_join_csv, logger):
left_csv = nbi_30_join_csv
right_csv = nbi_10_join_csv

Expand Down Expand Up @@ -61,7 +61,7 @@ def process_all_join(nbi_30_join_csv, nbi_10_join_csv, all_join_dask, all_join_c
all_join_csv,
index=False,
)
print(f"Output file: {all_join_csv} has been created successfully!")
logger.info(f"Output file: {all_join_csv} has been created successfully!")

# Optional: Clean up the part files
shutil.rmtree(all_join_dask)
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def convert_to_geopackage(input_file, output_file):
subprocess.run(cmd, check=True)


def filter_ways(input_osm_pbf, output_osm_pbf, output_gpkg):
def filter_ways(input_osm_pbf, output_osm_pbf, output_gpkg, logger):
"""
Perform filter operation.
"""
Expand Down Expand Up @@ -50,4 +50,4 @@ def filter_ways(input_osm_pbf, output_osm_pbf, output_gpkg):
# Convert the filtered OSM PBF file to a GeoPackage
convert_to_geopackage(output_osm_pbf, output_gpkg)

print(f"Output file: {output_gpkg} has been created successfully!")
logger.info(f"Output file: {output_gpkg} has been created successfully!")
varun-andhra-mapup marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def exclude_duplicate_bridges(df, output_duplicate_exclude_csv):
return df


def convert_to_gpkg(df, output_gpkg_file):
def convert_to_gpkg(df, output_gpkg_file, logger):
varun-andhra-mapup marked this conversation as resolved.
Show resolved Hide resolved
"""
Function to convert the DataFrame to a GeoPackage
"""
Expand All @@ -45,10 +45,10 @@ def convert_to_gpkg(df, output_gpkg_file):

gdf.to_file(output_gpkg_file, driver="GPKG")

print(f"GeoPackage saved successfully to {output_gpkg_file}")
logger.info(f"GeoPackage saved successfully to {output_gpkg_file}")


def create_nbi_geopackage(input_csv, output_duplicate_exclude_csv, output_gpkg_file):
def create_nbi_geopackage(input_csv, output_duplicate_exclude_csv, output_gpkg_file, logger):
"""
Funtion to perform processing of coordinates and filtering of bridges
"""
Expand All @@ -58,4 +58,4 @@ def create_nbi_geopackage(input_csv, output_duplicate_exclude_csv, output_gpkg_f
df = exclude_duplicate_bridges(df, output_duplicate_exclude_csv)

# Convert the final DataFrame to a GeoPackage file
convert_to_gpkg(df, output_gpkg_file)
convert_to_gpkg(df, output_gpkg_file, logger)
Original file line number Diff line number Diff line change
Expand Up @@ -199,18 +199,18 @@ def get_line_intersections(filtered_osm_gl, rivers_gl):
return intersections


def load_layers(nbi_points_fp, osm_fp):
def load_layers(nbi_points_fp, osm_fp, logger):
"""
Load required layers
"""
nbi_points_gl = QgsVectorLayer(nbi_points_fp, "nbi-points", "ogr")
if not nbi_points_gl.isValid():
print("NBI points layer failed to load!")
logger.error("NBI points layer failed to load!")
sys.exit(1)

osm_gl = QgsVectorLayer(osm_fp, "filtered", "ogr")
if not osm_gl.isValid():
print("OSM ways layer failed to load!")
logger.error("OSM ways layer failed to load!")
sys.exit(1)

return nbi_points_gl, osm_gl
Expand Down Expand Up @@ -242,7 +242,7 @@ def load_layers(nbi_points_fp, osm_fp):


def process_bridge(
nbi_points_gl, exploded_osm_gl, bridge_yes_join_csv, yes_filter_bridges
nbi_points_gl, exploded_osm_gl, bridge_yes_join_csv, yes_filter_bridges, logger
):
"""
Process bridges: filter and join NBI data with OSM data
Expand Down Expand Up @@ -272,7 +272,7 @@ def process_bridge(
filtered_layer, yes_filter_bridges, "utf-8", filtered_layer.crs(), "GPKG"
)

print(f"\nOutput file: {yes_filter_bridges} has been created successfully!")
logger.info(f"Output file: {yes_filter_bridges} has been created successfully!")

QgsProject.instance().removeMapLayer(filtered_osm_gl.id())
QgsProject.instance().removeMapLayer(buffer_80.id())
Expand All @@ -282,7 +282,7 @@ def process_bridge(


def process_layer_tag(
nbi_points_gl, exploded_osm_gl, manmade_join_csv, manmade_filter_bridges
nbi_points_gl, exploded_osm_gl, manmade_join_csv, manmade_filter_bridges, logger
):
"""
Process layer tags: filter and join NBI data with OSM data based on layer tag
Expand Down Expand Up @@ -312,7 +312,7 @@ def process_layer_tag(
filtered_layer, manmade_filter_bridges, "utf-8", filtered_layer.crs(), "GPKG"
)

print(f"\nOutput file: {manmade_filter_bridges} has been created successfully!")
logger.info(f"Output file: {manmade_filter_bridges} has been created successfully!")

QgsProject.instance().removeMapLayer(filtered_osm_gl.id())
QgsProject.instance().removeMapLayer(buffer_30.id())
Expand All @@ -322,7 +322,7 @@ def process_layer_tag(


def process_parallel_bridges(
nbi_points_gl, exploded_osm_gl, parallel_join_csv, parallel_filter_bridges
nbi_points_gl, exploded_osm_gl, parallel_join_csv, parallel_filter_bridges, logger
):
"""
Process parallel bridges: identify and filter parallel bridges
Expand Down Expand Up @@ -361,7 +361,9 @@ def process_parallel_bridges(
filtered_layer, parallel_filter_bridges, "utf-8", filtered_layer.crs(), "GPKG"
)

print(f"\nOutput file: {parallel_filter_bridges} has been created successfully!")
logger.info(
f"Output file: {parallel_filter_bridges} has been created successfully!"
)

QgsProject.instance().removeMapLayer(filtered_osm_gl.id())
QgsProject.instance().removeMapLayer(buffer_30.id())
Expand Down Expand Up @@ -400,6 +402,7 @@ def process_culverts_from_pbf(
state_name,
culvert_join_csv,
final_bridges,
logger,
):
"""
Process and filter out tunnels marked as culverts from a local OSM PBF file.
Expand Down Expand Up @@ -485,7 +488,7 @@ def process_culverts_from_pbf(
filtered_layer, final_bridges, "utf-8", filtered_layer.crs(), "GPKG"
)

print(f"\nOutput file: {final_bridges} has been created successfully!")
logger.info(f"Output file: {final_bridges} has been created successfully!")

# Remove temporary layers from the project
QgsProject.instance().removeMapLayer(osm_layer.id())
Expand All @@ -505,16 +508,18 @@ def process_buffer_join(
osm_nhd_join_csv,
nbi_10_join_csv,
nbi_30_join_csv,
logger,
):
"""
Process buffer join: join NBI data with OSM and river data
"""
base_filename = os.path.splitext(os.path.basename(rivers_data))[0]
rivers_fp = rivers_data + f"|layername=NHD-{state_name}-Flowline"
# rivers_fp = rivers_data + "|layername=NHDFlowline"

rivers_gl = QgsVectorLayer(rivers_fp, "rivers", "ogr")
if not rivers_gl.isValid():
print("Rivers layer failed to load!")
logger.error("Rivers layer failed to load!")
sys.exit(1)

filter_expression = "highway not in ('abandoned','bridleway','construction','corridor','crossing','cycleway','elevator','escape','footway','living_street','path','pedestrian','planned','proposed','raceway','rest_area','steps') AND bridge IS NULL AND layer IS NULL"
Expand All @@ -526,7 +531,7 @@ def process_buffer_join(
intersections,
intersections_csv,
)
print(f"\nOutput file: {intersections_csv} has been created successfully!")
logger.info(f"Output file: {intersections_csv} has been created successfully!")

osm_river_join = join_by_location(
osm_gl,
Expand All @@ -545,7 +550,7 @@ def process_buffer_join(
osm_river_join,
osm_nhd_join_csv,
)
print(f"\nOutput file: {osm_nhd_join_csv} has been created successfully!")
logger.info(f"Output file: {osm_nhd_join_csv} has been created successfully!")

buffer_10 = create_buffer(nbi_points_gl, 0.0001)
buffer_30 = create_buffer(nbi_points_gl, 0.0003)
Expand All @@ -568,13 +573,12 @@ def process_buffer_join(
"permanent_identifier",
]


vl_to_csv_filter(
nbi_10_river_join,
nbi_10_join_csv,
keep_fields,
)
print(f"\nOutput file: {nbi_10_join_csv} has been created successfully!")
logger.info(f"Output file: {nbi_10_join_csv} has been created successfully!")

nbi_30_osm_river_join = join_by_location(
buffer_30, osm_river_join, [], geometric_predicates=[0]
Expand All @@ -597,7 +601,7 @@ def process_buffer_join(
nbi_30_join_csv,
keep_fields,
)
print(f"\nOutput file: {nbi_30_join_csv} has been created successfully!")
logger.info(f"Output file: {nbi_30_join_csv} has been created successfully!")


def process_tagging(
Expand All @@ -612,14 +616,15 @@ def process_tagging(
parallel_filter_bridges,
nearby_join_csv,
state_folder,
state_name,
culvert_join_csv,
final_bridges,
rivers_data,
intersections_csv,
osm_nhd_join_csv,
nbi_10_join_csv,
nbi_30_join_csv,
logger,
state_name,
):
# Get QGIS pathname for NBI points vector layer
base_filename = os.path.splitext(os.path.basename(nbi_geopackage))[0]
Expand All @@ -628,17 +633,21 @@ def process_tagging(
osm_fp = filtered_highways + "|layername=lines"
osm_pbf_path = state_latest_osm

nbi_points_gl, osm_gl = load_layers(nbi_points_fp, osm_fp)
nbi_points_gl, osm_gl = load_layers(nbi_points_fp, osm_fp, logger)
exploded_osm_gl = explode_osm_data(osm_gl)

output_layer1 = process_bridge(
nbi_points_gl, exploded_osm_gl, bridge_yes_join_csv, yes_filter_bridges
nbi_points_gl, exploded_osm_gl, bridge_yes_join_csv, yes_filter_bridges, logger
)
output_layer2 = process_layer_tag(
output_layer1, exploded_osm_gl, manmade_join_csv, manmade_filter_bridges
output_layer1, exploded_osm_gl, manmade_join_csv, manmade_filter_bridges, logger
)
output_layer3 = process_parallel_bridges(
output_layer2, exploded_osm_gl, parallel_join_csv, parallel_filter_bridges
output_layer2,
exploded_osm_gl,
parallel_join_csv,
parallel_filter_bridges,
logger,
)
process_nearby_bridges(output_layer3, nearby_join_csv)
output_layer4 = process_culverts_from_pbf(
Expand All @@ -648,6 +657,7 @@ def process_tagging(
state_name,
culvert_join_csv,
final_bridges,
logger,
)
process_buffer_join(
output_layer4,
Expand All @@ -659,4 +669,5 @@ def process_tagging(
osm_nhd_join_csv,
nbi_10_join_csv,
nbi_30_join_csv,
logger,
)
Loading