Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import performance #3626

Merged
merged 3 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 10 additions & 25 deletions lib-sql/functions/partition-functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,6 @@ CREATE TYPE nearfeaturecentr AS (
centroid GEOMETRY
);

-- feature intersects geometry
-- for areas and linestrings they must touch at least along a line
CREATE OR REPLACE FUNCTION is_relevant_geometry(de9im TEXT, geom_type TEXT)
RETURNS BOOLEAN
AS $$
BEGIN
IF substring(de9im from 1 for 2) != 'FF' THEN
RETURN TRUE;
END IF;

IF geom_type = 'ST_Point' THEN
RETURN substring(de9im from 4 for 1) = '0';
END IF;

IF geom_type in ('ST_LineString', 'ST_MultiLineString') THEN
RETURN substring(de9im from 4 for 1) = '1';
END IF;

RETURN substring(de9im from 4 for 1) = '2';
END
$$ LANGUAGE plpgsql IMMUTABLE;

CREATE OR REPLACE function getNearFeatures(in_partition INTEGER, feature GEOMETRY,
feature_centroid GEOMETRY,
maxrank INTEGER)
Expand All @@ -59,7 +37,12 @@ BEGIN
isguess, postcode, centroid
FROM location_area_large_{{ partition }}
WHERE geometry && feature
AND is_relevant_geometry(ST_Relate(geometry, feature), ST_GeometryType(feature))
AND CASE WHEN ST_Dimension(feature) = 0
THEN _ST_Covers(geometry, feature)
WHEN ST_Dimension(feature) = 2
THEN ST_Relate(geometry, feature, 'T********')
ELSE ST_NPoints(ST_Intersection(geometry, feature)) > 1
END
AND rank_address < maxrank
-- Postcodes currently still use rank_search to define for which
-- features they are relevant.
Expand Down Expand Up @@ -142,14 +125,16 @@ BEGIN

IF in_rank_search <= 4 and not in_estimate THEN
INSERT INTO location_area_country (place_id, country_code, geometry)
values (in_place_id, in_country_code, in_geometry);
(SELECT in_place_id, in_country_code, geom
FROM split_geometry(in_geometry) as geom);
RETURN TRUE;
END IF;

{% for partition in db.partitions %}
IF in_partition = {{ partition }} THEN
INSERT INTO location_area_large_{{ partition }} (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, postcode, centroid, geometry)
values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, postcode, in_centroid, in_geometry);
(SELECT in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, postcode, in_centroid, geom
FROM split_geometry(in_geometry) as geom);
RETURN TRUE;
END IF;
{% endfor %}
Expand Down
69 changes: 39 additions & 30 deletions lib-sql/functions/utils.sql
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,6 @@ CREATE OR REPLACE FUNCTION add_location(place_id BIGINT, country_code varchar(2)
RETURNS BOOLEAN
AS $$
DECLARE
locationid INTEGER;
secgeo GEOMETRY;
postcode TEXT;
BEGIN
PERFORM deleteLocationArea(partition, place_id, rank_search);
Expand All @@ -360,18 +358,19 @@ BEGIN
postcode := upper(trim (in_postcode));
END IF;

IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') THEN
FOR secgeo IN select split_geometry(geometry) AS geom LOOP
PERFORM insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, postcode, centroid, secgeo);
END LOOP;

ELSEIF ST_GeometryType(geometry) = 'ST_Point' THEN
secgeo := place_node_fuzzy_area(geometry, rank_search);
PERFORM insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, postcode, centroid, secgeo);
IF ST_Dimension(geometry) = 2 THEN
RETURN insertLocationAreaLarge(partition, place_id, country_code, keywords,
rank_search, rank_address, false, postcode,
centroid, geometry);
END IF;

IF ST_Dimension(geometry) = 0 THEN
RETURN insertLocationAreaLarge(partition, place_id, country_code, keywords,
rank_search, rank_address, true, postcode,
centroid, place_node_fuzzy_area(geometry, rank_search));
END IF;

RETURN true;
RETURN false;
END;
$$
LANGUAGE plpgsql;
Expand All @@ -394,19 +393,21 @@ DECLARE
geo RECORD;
area FLOAT;
remainingdepth INTEGER;
added INTEGER;
BEGIN

-- RAISE WARNING 'quad_split_geometry: maxarea=%, depth=%',maxarea,maxdepth;

IF (ST_GeometryType(geometry) not in ('ST_Polygon','ST_MultiPolygon') OR NOT ST_IsValid(geometry)) THEN
IF not ST_IsValid(geometry) THEN
RETURN;
END IF;

IF ST_Dimension(geometry) != 2 OR maxdepth <= 1 THEN
RETURN NEXT geometry;
RETURN;
END IF;

remainingdepth := maxdepth - 1;
area := ST_AREA(geometry);
IF remainingdepth < 1 OR area < maxarea THEN
IF area < maxarea THEN
RETURN NEXT geometry;
RETURN;
END IF;
Expand All @@ -426,7 +427,6 @@ BEGIN
xmid := (xmin+xmax)/2;
ymid := (ymin+ymax)/2;

added := 0;
FOR seg IN 1..4 LOOP

IF seg = 1 THEN
Expand All @@ -442,16 +442,13 @@ BEGIN
secbox := ST_SetSRID(ST_MakeBox2D(ST_Point(xmid,ymid),ST_Point(xmax,ymax)),4326);
END IF;

IF st_intersects(geometry, secbox) THEN
secgeo := st_intersection(geometry, secbox);
IF NOT ST_IsEmpty(secgeo) AND ST_GeometryType(secgeo) in ('ST_Polygon','ST_MultiPolygon') THEN
FOR geo IN select quad_split_geometry(secgeo, maxarea, remainingdepth) as geom LOOP
IF NOT ST_IsEmpty(geo.geom) AND ST_GeometryType(geo.geom) in ('ST_Polygon','ST_MultiPolygon') THEN
added := added + 1;
RETURN NEXT geo.geom;
END IF;
END LOOP;
END IF;
secgeo := st_intersection(geometry, secbox);
IF NOT ST_IsEmpty(secgeo) AND ST_Dimension(secgeo) = 2 THEN
FOR geo IN SELECT quad_split_geometry(secgeo, maxarea, remainingdepth) as geom LOOP
IF NOT ST_IsEmpty(geo.geom) AND ST_Dimension(geo.geom) = 2 THEN
RETURN NEXT geo.geom;
END IF;
END LOOP;
END IF;
END LOOP;

Expand All @@ -467,10 +464,22 @@ CREATE OR REPLACE FUNCTION split_geometry(geometry GEOMETRY)
DECLARE
geo RECORD;
BEGIN
-- 10000000000 is ~~ 1x1 degree
FOR geo IN select quad_split_geometry(geometry, 0.25, 20) as geom LOOP
RETURN NEXT geo.geom;
END LOOP;
IF ST_GeometryType(geometry) = 'ST_MultiPolygon'
and ST_Area(geometry) * 10 > ST_Area(Box2D(geometry))
THEN
FOR geo IN
SELECT quad_split_geometry(g, 0.25, 20) as geom
FROM (SELECT (ST_Dump(geometry)).geom::geometry(Polygon, 4326) AS g) xx
LOOP
RETURN NEXT geo.geom;
END LOOP;
ELSE
FOR geo IN
SELECT quad_split_geometry(geometry, 0.25, 20) as geom
LOOP
RETURN NEXT geo.geom;
END LOOP;
END IF;
RETURN;
END;
$$
Expand Down
Loading