Skip to content

Commit

Permalink
Merge branch 'main' of github.com:robsavoye/conflator
Browse files Browse the repository at this point in the history
Sync with upstream
  • Loading branch information
robsavoye committed Sep 23, 2023
2 parents da94b09 + ba04c94 commit 7e6bd1e
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 54 deletions.
3 changes: 3 additions & 0 deletions hotstuff.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def usage(self):


def writeLayer(file=None, layer=None):
"""Write an OGR Layer to disk"""
if file is None:
logging.error("Supply a filespec!")
return
Expand Down Expand Up @@ -157,6 +158,7 @@ def writeLayer(file=None, layer=None):
outfile.Destroy()

def makeBoundary(data=None):
"""Make a bounding box for some data"""
# Create a bounding box. since we want a rectangular area to extract
# to fit a monitor window. Also many boundaries come as lines,
# so close the polygon
Expand Down Expand Up @@ -289,6 +291,7 @@ def getProjectBoundary(options=None):
return data

def makeFeature(id, fields, geom):
"""Make an OGR Feature"""
feature = ogr.Feature(fields)
feature.SetField("id", id)
feature.SetField("building", "yes")
Expand Down
154 changes: 100 additions & 54 deletions poidup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python3

# Copyright (c) 2022 Humanitarian OpenStreetMap Team
# Copyright (c) 2022, 2023 Humanitarian OpenStreetMap Team
#
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -28,7 +28,7 @@
# designed for larger datasets, so if that becomes necessary.

import logging
import getopt
import argparse
from sys import argv
import os
import epdb
Expand All @@ -41,61 +41,107 @@
import psycopg2
from shapely.geometry import shape

# Instantiate logger
log = logging.getLogger(__name__)

# The tags we care about for this conflation
tags = ('amenity', 'leisure', 'information', 'tourism', 'sport')

indata = 'CWP_Facilities.geojson'
infile = open(indata, 'r')

outdata = 'cwp.geojson'
outfile = open(outdata, 'w')
outfeatures = list()

db = 'colorado'
connect = f"host=localhost dbname={db}"
dbshell = psycopg2.connect(connect)
dbcursor = dbshell.cursor()

# The tolerance in meters for nearby features
tolerance = "2"
data = geojson.load(infile)
spin = Bar('Processing...', max=len(data['features']))

print("Data file contains %d features" % len(data['features']))
for feature in data['features']:
hits = False
spin.next()
tags = feature['properties']
for tag in feature['properties']:
if tag in tags:
for key,value in feature['properties'].items():
# print("%s = %s" % (key, value))
geom = feature['geometry']
wkt = shape(geom)
# Use a Geography data type to get the answer in meters, which
# is easier to deal with than degress of the earth.
# query = "SELECT osm_id,geom,tags,ST_Distance(geom::geography, ST_GeogFromText(\'SRID=4326;%s\')) AS diff FROM nodes WHERE ST_Distance(geom::geography, ST_GeogFromText(\'SRID=4326;%s\')) < %s AND tags->>'%s'='%s'" % (wkt.wkt, wkt.wkt, tolerance, key, value.replace("\'", "&apos;"))
query = "SELECT osm_id,geom,tags FROM nodes WHERE ST_Distance(geom::geography, ST_GeogFromText(\'SRID=4326;%s\')) < %s AND tags->>'%s'='%s'" % (wkt.wkt, tolerance, key, value.replace("\'", "&apos;"))
#print(query)
dbcursor.execute(query)
all = dbcursor.fetchall()
if len(all) > 0:
hits = True
if tag == 'amenity':
# Sometimes the duplicate is a polygon, really common for parking lots.
query = "SELECT osm_id,geom,tags FROM ways_poly WHERE ST_Distance(geom::geography, ST_GeogFromText(\'SRID=4326;%s\')) < %s AND tags->>'%s'='%s'" % (wkt.wkt, tolerance, key, value.replace("\'", "&apos;"))
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Conflate collected data with existing data"
)
parser.add_argument("-v", "--verbose", nargs="?", const="0", help="verbose output")
parser.add_argument("-dn", "--dbname", default="colorado", help="Database name")
parser.add_argument("-dh", "--dbhost", default="localhost", help="Database host")
parser.add_argument("-i", "--infile", help="Input file")
parser.add_argument("-o", "--outfile", default="poi-out.geojson", help="Output file")

args = parser.parse_args()

if len(argv) <= 1:
parser.print_help()
quit()

# if verbose, dump to the terminal.
if args.verbose is not None:
root = logging.getLogger()
log.setLevel(logging.DEBUG)

ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
ch.setFormatter(formatter)
root.addHandler(ch)

# The tags we care about for this conflation
tags = ('amenity', 'leisure', 'information', 'tourism', 'sport')

if args.infile is None:
logging.error("You must specify the input file to conflate!")
parser.print_help()
quit()
else:
indata = args.infile
infile = open(indata, 'r')

if args.outfile is not None:
outdata = args.outfile
outfile = open(outdata, 'w')
outfeatures = list()

if args.dbname is not None:
db = args.dbname

connect = f"dbname={db}"
if args.dbhost is not None:
connect += f" host={args.dbhost}"
dbshell = psycopg2.connect(connect)
dbcursor = dbshell.cursor()

# The tolerance in meters for nearby features
tolerance = "2"
data = geojson.load(infile)
spin = Bar('Processing...', max=len(data['features']))

print("Data file contains %d features" % len(data['features']))
# If there is feature in OSM that matches any of the tags. and
# is very close, flag it as a possible duplicate so we can find
# these in JOSM.
for feature in data['features']:
hits = False
spin.next()
tags = feature['properties']
for tag in feature['properties']:
if tag in tags:
for key,value in feature['properties'].items():
# print("%s = %s" % (key, value))
geom = feature['geometry']
wkt = shape(geom)
# Use a Geography data type to get the answer in meters, which
# is easier to deal with than degress of the earth.
query = "SELECT osm_id,geom,tags FROM nodes WHERE ST_Distance(geom::geography, ST_GeogFromText(\'SRID=4326;%s\')::geography) < %s AND tags->>'%s'='%s'" % (wkt.wkt, tolerance, key, value.replace("\'", "&apos;"))
#print(query)
dbcursor.execute(query)
all = dbcursor.fetchall()
if len(all) > 0:
# log.debug(f"NODE: {all}")
hits = True
# If there is feature in OSM that matches any of the tags. and is very close,
# flag it as a possible duplicate so we can find these in JOSM.
if hits:
feature['properties']['fixme'] = "Probably a duplicate!"
# print(feature)
outfeatures.append(feature)

print("Output file contains %d features" % len(outfeatures))
geojson.dump(FeatureCollection(outfeatures), outfile)
if tag == 'amenity':
# Sometimes the duplicate is a polygon, really common for parking lots.
query = "SELECT osm_id,geom,tags FROM ways_poly WHERE ST_Distance(geom::geography, ST_GeogFromText(\'SRID=4326;%s\')::geography) < %s AND tags->>'%s'='%s' AND tags->>'amenity' IS NOT NULL" % (wkt.wkt, tolerance, key, value.replace("\'", "&apos;"))
# print(query)
dbcursor.execute(query)
all = dbcursor.fetchall()
if len(all) > 0:
# log.debug(f"WAY: {all}")
hits = True
# We only need one good hit to identify a duplicate
if hits:
break
if hits:
feature['properties']['fixme'] = "Probably a duplicate!"
# print(feature)
outfeatures.append(feature)

print("Output file contains %d features" % len(outfeatures))
geojson.dump(FeatureCollection(outfeatures), outfile)

0 comments on commit 7e6bd1e

Please sign in to comment.