From f3c4cdd6efb834639f7eca176cc20b22af1a9514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Arroyo=20Torrens?= Date: Wed, 11 Oct 2023 17:17:24 +0200 Subject: [PATCH] feat(bq,sf|h3,quadbin): add H3/QUADBIN_POLYFILL_TABLE (#447) --- .../modules/doc/h3/H3_POLYFILL_MODE.md | 2 +- .../modules/doc/h3/H3_POLYFILL_TABLE.md | 58 ++++++ .../doc/quadbin/QUADBIN_POLYFILL_TABLE.md | 58 ++++++ .../modules/sql/h3/H3_POLYFILL_TABLE.sql | 59 ++++++ .../sql/quadbin/QUADBIN_POLYFILL_TABLE.sql | 59 ++++++ .../modules/sql/utils/__CHECK_TABLE.sql | 33 ++++ .../modules/sql/utils/__TABLENAME_JOIN.sql | 14 ++ .../modules/sql/utils/__TABLENAME_SPLIT.sql | 16 ++ .../modules/test/h3/H3_POLYFILL_TABLE.test.js | 20 ++ .../quadbin/QUADBIN_POLYFILL_TABLE.test.js | 20 ++ .../modules/doc/h3/H3_POLYFILL_TABLE.md | 58 ++++++ .../modules/sql/h3/H3_POLYFILL_TABLE.sql | 182 ++++++++++++++++++ .../modules/test/h3/H3_POLYFILL_TABLE.test.js | 26 +++ 13 files changed, 604 insertions(+), 1 deletion(-) create mode 100644 clouds/bigquery/modules/doc/h3/H3_POLYFILL_TABLE.md create mode 100644 clouds/bigquery/modules/doc/quadbin/QUADBIN_POLYFILL_TABLE.md create mode 100644 clouds/bigquery/modules/sql/h3/H3_POLYFILL_TABLE.sql create mode 100644 clouds/bigquery/modules/sql/quadbin/QUADBIN_POLYFILL_TABLE.sql create mode 100644 clouds/bigquery/modules/sql/utils/__CHECK_TABLE.sql create mode 100644 clouds/bigquery/modules/sql/utils/__TABLENAME_JOIN.sql create mode 100644 clouds/bigquery/modules/sql/utils/__TABLENAME_SPLIT.sql create mode 100644 clouds/bigquery/modules/test/h3/H3_POLYFILL_TABLE.test.js create mode 100644 clouds/bigquery/modules/test/quadbin/QUADBIN_POLYFILL_TABLE.test.js create mode 100644 clouds/snowflake/modules/doc/h3/H3_POLYFILL_TABLE.md create mode 100644 clouds/snowflake/modules/sql/h3/H3_POLYFILL_TABLE.sql create mode 100644 clouds/snowflake/modules/test/h3/H3_POLYFILL_TABLE.test.js diff --git a/clouds/bigquery/modules/doc/h3/H3_POLYFILL_MODE.md b/clouds/bigquery/modules/doc/h3/H3_POLYFILL_MODE.md index 8b3596ac0..da41d4e68 100644 --- a/clouds/bigquery/modules/doc/h3/H3_POLYFILL_MODE.md +++ b/clouds/bigquery/modules/doc/h3/H3_POLYFILL_MODE.md @@ -6,7 +6,7 @@ H3_POLYFILL_MODE(geog, resolution, mode) **Description** -Returns an array of quadbin cell indexes contained in the given geography at a given level of detail. Containment is determined by the mode: center, intersects, contains. +Returns an array of H3 cell indexes contained in the given geography at a given level of detail. Containment is determined by the mode: center, intersects, contains. * `geog`: `GEOGRAPHY` representing the shape to cover. * `resolution`: `INT64` level of detail. The value must be between 0 and 15 ([H3 resolution table](https://h3geo.org/docs/core-library/restable)). diff --git a/clouds/bigquery/modules/doc/h3/H3_POLYFILL_TABLE.md b/clouds/bigquery/modules/doc/h3/H3_POLYFILL_TABLE.md new file mode 100644 index 000000000..885c23b72 --- /dev/null +++ b/clouds/bigquery/modules/doc/h3/H3_POLYFILL_TABLE.md @@ -0,0 +1,58 @@ +## H3_POLYFILL_TABLE (BETA) + +```sql:signature +H3_POLYFILL_TABLE(input_query, resolution, mode, output_table) +``` + +**Description** + +Returns a table with the H3 cell indexes contained in the given geography at a given level of detail. Containment is determined by the mode: center, intersects, contains. All the attributes except the geography will be included in the output table, clustered by the h3 column. + +* `input_query`: `STRING` input data to polyfill. It must contain a column `geom` with the shape to cover. Additionally, other columns can be included. +* `resolution`: `INT64` level of detail. The value must be between 0 and 15 ([H3 resolution table](https://h3geo.org/docs/core-library/restable)). +* `mode`: `STRING` + * `center` returns the indexes of the H3 cells which centers intersect the input geography (polygon). The resulting H3 set does not fully cover the input geography, however, this is **significantly faster** that the other modes. This mode is not compatible with points or lines. Equivalent to [`H3_POLYFILL`](h3#h3_polyfill). + * `intersects` returns the indexes of the H3 cells that intersect the input geography. The resulting H3 set will completely cover the input geography (point, line, polygon). + * `contains` returns the indexes of the H3 cells that are entirely contained inside the input geography (polygon). This mode is not compatible with points or lines. +* `output_table`: `STRING` name of the output table to store the results of the polyfill. + +Mode `center`: + +![](h3_polyfill_mode_center.png) + +Mode `intersects`: + +![](h3_polyfill_mode_intersects.png) + +Mode `contains`: + +![](h3_polyfill_mode_contains.png) + +**Output** + +The results are stored in the table named ``, which contains the following columns: + +* `h3`: `STRING` the geometry of the considered point. +* The rest of columns included in `input_query` except `geom`. + +**Examples** + +```sql +CALL carto.H3_POLYFILL_TABLE( + "SELECT ST_GEOGFROMTEXT('POLYGON ((-3.71219873428345 40.413365349070865, -3.7144088745117 40.40965661286395, -3.70659828186035 40.409525904775634, -3.71219873428345 40.413365349070865))') AS geom", + 9, 'intersects', + '..' +); +-- The table `..` will be created +-- with column: h3 +``` + +```sql +CALL carto.H3_POLYFILL_TABLE( + 'SELECT geom, name, value FROM `..`', + 9, 'center', + '..' +); +-- The table `..` will be created +-- with columns: h3, name, value +``` diff --git a/clouds/bigquery/modules/doc/quadbin/QUADBIN_POLYFILL_TABLE.md b/clouds/bigquery/modules/doc/quadbin/QUADBIN_POLYFILL_TABLE.md new file mode 100644 index 000000000..f3f59728a --- /dev/null +++ b/clouds/bigquery/modules/doc/quadbin/QUADBIN_POLYFILL_TABLE.md @@ -0,0 +1,58 @@ +## QUADBIN_POLYFILL_TABLE (BETA) + +```sql:signature +QUADBIN_POLYFILL_TABLE(input_query, resolution, mode, output_table) +``` + +**Description** + +Returns a table with the quadbin cell indexes contained in the given geography at a given level of detail. Containment is determined by the mode: center, intersects, contains. All the attributes except the geography will be included in the output table, clustered by the quadbin column. + +* `input_query`: `STRING` input data to polyfill. It must contain a column `geom` with the shape to cover. Additionally, other columns can be included. +* `resolution`: `INT64` level of detail. The value must be between 0 and 26. +* `mode`: `STRING` + * `center` returns the indexes of the quadbin cells which centers intersect the input geography (polygon). The resulting quadbin set does not fully cover the input geography, however, this is **significantly faster** that the other modes. This mode is not compatible with points or lines. Equivalent to [`QUADBIN_POLYFILL`](quadbin#quadbin_polyfill). + * `intersects` returns the indexes of the quadbin cells that intersect the input geography. The resulting quadbin set will completely cover the input geography (point, line, polygon). + * `contains` returns the indexes of the quadbin cells that are entirely contained inside the input geography (polygon). This mode is not compatible with points or lines. +* `output_table`: `STRING` name of the output table to store the results of the polyfill. + +Mode `center`: + +![](quadbin_polyfill_mode_center.png) + +Mode `intersects`: + +![](quadbin_polyfill_mode_intersects.png) + +Mode `contains`: + +![](quadbin_polyfill_mode_contains.png) + +**Output** + +The results are stored in the table named ``, which contains the following columns: + +* `quadbin`: `INT64` the geometry of the considered point. +* The rest of columns included in `input_query` except `geom`. + +**Examples** + +```sql +CALL carto.QUADBIN_POLYFILL_TABLE( + "SELECT ST_GEOGFROMTEXT('POLYGON ((-3.71219873428345 40.413365349070865, -3.7144088745117 40.40965661286395, -3.70659828186035 40.409525904775634, -3.71219873428345 40.413365349070865))') AS geom", + 12, 'intersects', + '..' +); +-- The table `..` will be created +-- with column: quadbin +``` + +```sql +CALL carto.QUADBIN_POLYFILL_TABLE( + 'SELECT geom, name, value FROM `..
`', + 12, 'center', + '..' +); +-- The table `..` will be created +-- with columns: quadbin, name, value +``` diff --git a/clouds/bigquery/modules/sql/h3/H3_POLYFILL_TABLE.sql b/clouds/bigquery/modules/sql/h3/H3_POLYFILL_TABLE.sql new file mode 100644 index 000000000..846d3feb6 --- /dev/null +++ b/clouds/bigquery/modules/sql/h3/H3_POLYFILL_TABLE.sql @@ -0,0 +1,59 @@ +---------------------------- +-- Copyright (C) 2023 CARTO +---------------------------- + +CREATE OR REPLACE FUNCTION `@@BQ_DATASET@@.__H3_POLYFILL_QUERY` +( + input_query STRING, + resolution INT64, + mode STRING, + output_table STRING +) +RETURNS STRING +DETERMINISTIC +LANGUAGE js +AS """ + if (!['center', 'intersects', 'contains'].includes(mode)) { + throw Error('Invalid mode, should be center, intersects, or contains.') + } + + if (resolution < 0 || resolution > 15) { + throw Error('Invalid resolution, should be between 0 and 15.') + } + + output_table = output_table.replace(/`/g, '') + + const containmentFunction = (mode === 'contains') ? 'ST_CONTAINS' : 'ST_INTERSECTS' + const cellFunction = (mode === 'center') ? '@@BQ_DATASET@@.H3_CENTER' : '@@BQ_DATASET@@.H3_BOUNDARY' + + return 'CREATE TABLE `' + output_table + '` CLUSTER BY (h3) AS\\n' + + 'WITH __input AS (' + input_query + '),\\n' + + '__cells AS (SELECT h3, i.* FROM __input AS i,\\n' + + 'UNNEST(`@@BQ_DATASET@@.__H3_POLYFILL_INIT`(geom,`@@BQ_DATASET@@.__H3_POLYFILL_INIT_Z`(geom,' + resolution + '))) AS parent,\\n' + + 'UNNEST(`@@BQ_DATASET@@.H3_TOCHILDREN`(parent,' + resolution + ')) AS h3)\\n' + + 'SELECT * EXCEPT (geom) FROM __cells\\n' + + 'WHERE ' + containmentFunction + '(geom, `' + cellFunction + '`(h3));' +"""; + +CREATE OR REPLACE PROCEDURE `@@BQ_DATASET@@.H3_POLYFILL_TABLE` +( + input_query STRING, + resolution INT64, + mode STRING, + output_table STRING +) +BEGIN + DECLARE polyfill_query STRING; + + -- Check if the destination tileset already exists + CALL `@@BQ_DATASET@@.__CHECK_TABLE`(output_table); + + SET polyfill_query = `@@BQ_DATASET@@.__H3_POLYFILL_QUERY`( + input_query, + resolution, + mode, + output_table + ); + + EXECUTE IMMEDIATE polyfill_query; +END; diff --git a/clouds/bigquery/modules/sql/quadbin/QUADBIN_POLYFILL_TABLE.sql b/clouds/bigquery/modules/sql/quadbin/QUADBIN_POLYFILL_TABLE.sql new file mode 100644 index 000000000..65dd1f423 --- /dev/null +++ b/clouds/bigquery/modules/sql/quadbin/QUADBIN_POLYFILL_TABLE.sql @@ -0,0 +1,59 @@ +---------------------------- +-- Copyright (C) 2023 CARTO +---------------------------- + +CREATE OR REPLACE FUNCTION `@@BQ_DATASET@@.__QUADBIN_POLYFILL_QUERY` +( + input_query STRING, + resolution INT64, + mode STRING, + output_table STRING +) +RETURNS STRING +DETERMINISTIC +LANGUAGE js +AS """ + if (!['center', 'intersects', 'contains'].includes(mode)) { + throw Error('Invalid mode, should be center, intersects, or contains.') + } + + if (resolution < 0 || resolution > 26) { + throw Error('Invalid resolution, should be between 0 and 26.') + } + + output_table = output_table.replace(/`/g, '') + + const containmentFunction = (mode === 'contains') ? 'ST_CONTAINS' : 'ST_INTERSECTS' + const cellFunction = (mode === 'center') ? '@@BQ_DATASET@@.QUADBIN_CENTER' : '@@BQ_DATASET@@.QUADBIN_BOUNDARY' + + return 'CREATE TABLE `' + output_table + '` CLUSTER BY (quadbin) AS\\n' + + 'WITH __input AS (' + input_query + '),\\n' + + '__cells AS (SELECT quadbin, i.* FROM __input AS i,\\n' + + 'UNNEST(`@@BQ_DATASET@@.__QUADBIN_POLYFILL_INIT`(geom,`@@BQ_DATASET@@.__QUADBIN_POLYFILL_INIT_Z`(geom,' + resolution + '))) AS parent,\\n' + + 'UNNEST(`@@BQ_DATASET@@.QUADBIN_TOCHILDREN`(parent,' + resolution + ')) AS quadbin)\\n' + + 'SELECT * EXCEPT (geom) FROM __cells\\n' + + 'WHERE ' + containmentFunction + '(geom, `' + cellFunction + '`(quadbin));' +"""; + +CREATE OR REPLACE PROCEDURE `@@BQ_DATASET@@.QUADBIN_POLYFILL_TABLE` +( + input_query STRING, + resolution INT64, + mode STRING, + output_table STRING +) +BEGIN + DECLARE polyfill_query STRING; + + -- Check if the destination tileset already exists + CALL `@@BQ_DATASET@@.__CHECK_TABLE`(output_table); + + SET polyfill_query = `@@BQ_DATASET@@.__QUADBIN_POLYFILL_QUERY`( + input_query, + resolution, + mode, + output_table + ); + + EXECUTE IMMEDIATE polyfill_query; +END; diff --git a/clouds/bigquery/modules/sql/utils/__CHECK_TABLE.sql b/clouds/bigquery/modules/sql/utils/__CHECK_TABLE.sql new file mode 100644 index 000000000..6a6fabf7e --- /dev/null +++ b/clouds/bigquery/modules/sql/utils/__CHECK_TABLE.sql @@ -0,0 +1,33 @@ +--------------------------------- +-- Copyright (C) 2020-2021 CARTO +--------------------------------- + +CREATE OR REPLACE PROCEDURE `@@BQ_DATASET@@.__CHECK_TABLE` +(destination_table STRING) +BEGIN + DECLARE destination_parts DEFAULT (SELECT `@@BQ_DATASET@@.__TABLENAME_SPLIT`(destination_table)); + DECLARE tables_metadata STRING; + DECLARE table_name STRING; + DECLARE num_tables INT64; + + IF destination_parts IS NULL OR destination_parts.table IS NULL OR destination_parts.dataset IS NULL THEN + SELECT ERROR("The output table does not have a correct format, i.e. [projectID].dataset.tablename. Please, use a different output table name and try again."); + END IF; + + SET table_name = destination_parts.table; + SET tables_metadata = `@@BQ_DATASET@@.__TABLENAME_JOIN`((destination_parts.project, destination_parts.dataset, '__TABLES__')); + + EXECUTE IMMEDIATE FORMAT( + ''' + SELECT COUNT(size_bytes) + FROM %s + WHERE table_id='%s' + ''', + tables_metadata, + table_name + ) INTO num_tables; + + IF num_tables > 0 THEN + SELECT ERROR("The output table to store the tileset already exists. Please, use a different output table name and try again."); + END IF; +END; diff --git a/clouds/bigquery/modules/sql/utils/__TABLENAME_JOIN.sql b/clouds/bigquery/modules/sql/utils/__TABLENAME_JOIN.sql new file mode 100644 index 000000000..34d456290 --- /dev/null +++ b/clouds/bigquery/modules/sql/utils/__TABLENAME_JOIN.sql @@ -0,0 +1,14 @@ +---------------------------- +-- Copyright (C) 2021 CARTO +---------------------------- + +CREATE OR REPLACE FUNCTION `@@BQ_DATASET@@.__TABLENAME_JOIN` +(split_name STRUCT) +RETURNS STRING +AS ( + IF( + split_name.project IS NULL, + FORMAT('`%s`.`%s`', split_name.dataset, split_name.table), + FORMAT('`%s`.`%s`.`%s`', split_name.project, split_name.dataset, split_name.table) + ) +); diff --git a/clouds/bigquery/modules/sql/utils/__TABLENAME_SPLIT.sql b/clouds/bigquery/modules/sql/utils/__TABLENAME_SPLIT.sql new file mode 100644 index 000000000..91bfd9d52 --- /dev/null +++ b/clouds/bigquery/modules/sql/utils/__TABLENAME_SPLIT.sql @@ -0,0 +1,16 @@ +---------------------------- +-- Copyright (C) 2021 CARTO +---------------------------- + +CREATE OR REPLACE FUNCTION `@@BQ_DATASET@@.__TABLENAME_SPLIT` +(qualified_name STRING) +RETURNS STRUCT +AS (( + WITH unquoted AS (SELECT REPLACE(qualified_name, "`", "") AS name) + + SELECT AS STRUCT + REGEXP_EXTRACT(name, r"^(.+)\..+\..+$") AS project, + COALESCE(REGEXP_EXTRACT(name, r"^.+\.(.+)\..+$"), REGEXP_EXTRACT(name, r"^(.+)\..+$")) AS dataset, + REGEXP_EXTRACT(name, r"^.+\.(.+)$") AS table + FROM unquoted +)); diff --git a/clouds/bigquery/modules/test/h3/H3_POLYFILL_TABLE.test.js b/clouds/bigquery/modules/test/h3/H3_POLYFILL_TABLE.test.js new file mode 100644 index 000000000..9675f27f7 --- /dev/null +++ b/clouds/bigquery/modules/test/h3/H3_POLYFILL_TABLE.test.js @@ -0,0 +1,20 @@ +const { runQuery } = require('../../../common/test-utils'); + +const BQ_DATASET = process.env.BQ_DATASET; + +test('H3_POLYFILL_TABLE should generate the correct query', async () => { + const query = `SELECT \`@@BQ_DATASET@@.__H3_POLYFILL_QUERY\`( + 'SELECT geom, name, value FROM \`..
\`', + 12, 'center', + '..' + ) AS output`; + const rows = await runQuery(query); + expect(rows.length).toEqual(1); + expect(rows[0].output).toEqual(`CREATE TABLE \`..\` CLUSTER BY (h3) AS +WITH __input AS (SELECT geom, name, value FROM \`..
\`), +__cells AS (SELECT h3, i.* FROM __input AS i, +UNNEST(\`@@BQ_DATASET@@.__H3_POLYFILL_INIT\`(geom,\`@@BQ_DATASET@@.__H3_POLYFILL_INIT_Z\`(geom,12))) AS parent, +UNNEST(\`@@BQ_DATASET@@.H3_TOCHILDREN\`(parent,12)) AS h3) +SELECT * EXCEPT (geom) FROM __cells +WHERE ST_INTERSECTS(geom, \`@@BQ_DATASET@@.H3_CENTER\`(h3));`.replace(/@@BQ_DATASET@@/g, BQ_DATASET)); +}); diff --git a/clouds/bigquery/modules/test/quadbin/QUADBIN_POLYFILL_TABLE.test.js b/clouds/bigquery/modules/test/quadbin/QUADBIN_POLYFILL_TABLE.test.js new file mode 100644 index 000000000..ff444e35f --- /dev/null +++ b/clouds/bigquery/modules/test/quadbin/QUADBIN_POLYFILL_TABLE.test.js @@ -0,0 +1,20 @@ +const { runQuery } = require('../../../common/test-utils'); + +const BQ_DATASET = process.env.BQ_DATASET; + +test('QUADBIN_POLYFILL_TABLE should generate the correct query', async () => { + const query = `SELECT \`@@BQ_DATASET@@.__QUADBIN_POLYFILL_QUERY\`( + 'SELECT geom, name, value FROM \`..
\`', + 12, 'center', + '..' + ) AS output`; + const rows = await runQuery(query); + expect(rows.length).toEqual(1); + expect(rows[0].output).toEqual(`CREATE TABLE \`..\` CLUSTER BY (quadbin) AS +WITH __input AS (SELECT geom, name, value FROM \`..
\`), +__cells AS (SELECT quadbin, i.* FROM __input AS i, +UNNEST(\`@@BQ_DATASET@@.__QUADBIN_POLYFILL_INIT\`(geom,\`@@BQ_DATASET@@.__QUADBIN_POLYFILL_INIT_Z\`(geom,12))) AS parent, +UNNEST(\`@@BQ_DATASET@@.QUADBIN_TOCHILDREN\`(parent,12)) AS quadbin) +SELECT * EXCEPT (geom) FROM __cells +WHERE ST_INTERSECTS(geom, \`@@BQ_DATASET@@.QUADBIN_CENTER\`(quadbin));`.replace(/@@BQ_DATASET@@/g, BQ_DATASET)); +}); diff --git a/clouds/snowflake/modules/doc/h3/H3_POLYFILL_TABLE.md b/clouds/snowflake/modules/doc/h3/H3_POLYFILL_TABLE.md new file mode 100644 index 000000000..4378802bf --- /dev/null +++ b/clouds/snowflake/modules/doc/h3/H3_POLYFILL_TABLE.md @@ -0,0 +1,58 @@ +## H3_POLYFILL_TABLE (BETA) + +```sql:signature +H3_POLYFILL_TABLE(input_query, resolution, mode, output_table) +``` + +**Description** + +Returns a table with the H3 cell indexes contained in the given geography at a given level of detail. Containment is determined by the mode: center, intersects, contains. All the attributes except the geography will be included in the output table, clustered by the h3 column. + +* `input_query`: `STRING` input data to polyfill. It must contain a column `geom` with the shape to cover. Additionally, other columns can be included. +* `resolution`: `INT` level of detail. The value must be between 0 and 15 ([H3 resolution table](https://h3geo.org/docs/core-library/restable)). +* `mode`: `STRING` + * `center` returns the indexes of the H3 cells which centers intersect the input geography (polygon). The resulting H3 set does not fully cover the input geography, however, this is **significantly faster** that the other modes. This mode is not compatible with points or lines. Equivalent to [`H3_POLYFILL`](h3#h3_polyfill). + * `intersects` returns the indexes of the H3 cells that intersect the input geography. The resulting H3 set will completely cover the input geography (point, line, polygon). + * `contains` returns the indexes of the H3 cells that are entirely contained inside the input geography (polygon). This mode is not compatible with points or lines. +* `output_table`: `STRING` name of the output table to store the results of the polyfill. + +Mode `center`: + +![](h3_polyfill_mode_center.png) + +Mode `intersects`: + +![](h3_polyfill_mode_intersects.png) + +Mode `contains`: + +![](h3_polyfill_mode_contains.png) + +**Output** + +The results are stored in the table named ``, which contains the following columns: + +* `h3`: `STRING` the geometry of the considered point. +* The rest of columns included in `input_query` except `geom`. + +**Examples** + +```sql +CALL carto.H3_POLYFILL_TABLE( + 'SELECT TO_GEOGRAPHY(''POLYGON ((-3.71219873428345 40.413365349070865, -3.7144088745117 40.40965661286395, -3.70659828186035 40.409525904775634, -3.71219873428345 40.413365349070865))'') AS geom', + 9, 'intersects', + '..' +); +-- The table `..` will be created +-- with column: h3 +``` + +```sql +CALL carto.H3_POLYFILL_TABLE( + 'SELECT geom, name, value FROM `..
`', + 9, 'center', + '..' +); +-- The table `..` will be created +-- with columns: h3, name, value +``` diff --git a/clouds/snowflake/modules/sql/h3/H3_POLYFILL_TABLE.sql b/clouds/snowflake/modules/sql/h3/H3_POLYFILL_TABLE.sql new file mode 100644 index 000000000..67442c591 --- /dev/null +++ b/clouds/snowflake/modules/sql/h3/H3_POLYFILL_TABLE.sql @@ -0,0 +1,182 @@ +---------------------------- +-- Copyright (C) 2023 CARTO +---------------------------- + +CREATE OR REPLACE FUNCTION @@SF_SCHEMA@@._H3_POLYFILL_GEOJSON +(geojson STRING, input_resolution DOUBLE) +RETURNS ARRAY +LANGUAGE JAVASCRIPT +IMMUTABLE +AS $$ + if (!GEOJSON || INPUT_RESOLUTION == null) { + return []; + } + + @@SF_LIBRARY_H3_POLYFILL@@ + + const resolution = Number(INPUT_RESOLUTION); + if (resolution < 0 || resolution > 15) { + return []; + } + + const bboxA = [-180, -90, 0, 90] + const bboxB = [0, -90, 180, 90] + const featureGeometry = JSON.parse(GEOJSON) + let polygonCoordinatesA = []; + let polygonCoordinatesB = []; + switch(featureGeometry.type) { + case 'GeometryCollection': + featureGeometry.geometries.forEach(function (geom) { + if (geom.type === 'MultiPolygon') { + var clippedGeometryA = h3PolyfillLib.bboxClip(geom, bboxA).geometry; + polygonCoordinatesA = polygonCoordinatesA.concat(clippedGeometryA.coordinates); + var clippedGeometryB = h3PolyfillLib.bboxClip(geom, bboxB).geometry; + polygonCoordinatesB = polygonCoordinatesB.concat(clippedGeometryB.coordinates); + } else if (geom.type === 'Polygon') { + var clippedGeometryA = h3PolyfillLib.bboxClip(geom, bboxA).geometry; + polygonCoordinatesA = polygonCoordinatesA.concat([clippedGeometryA.coordinates]); + var clippedGeometryB = h3PolyfillLib.bboxClip(geom, bboxB).geometry; + polygonCoordinatesB = polygonCoordinatesB.concat([clippedGeometryB.coordinates]); + } + }); + break; + case 'MultiPolygon': + var clippedGeometryA = h3PolyfillLib.bboxClip(featureGeometry, bboxA).geometry; + polygonCoordinatesA = clippedGeometryA.coordinates; + var clippedGeometryB = h3PolyfillLib.bboxClip(featureGeometry, bboxB).geometry; + polygonCoordinatesB = clippedGeometryB.coordinates; + break; + case 'Polygon': + var clippedGeometryA = h3PolyfillLib.bboxClip(featureGeometry, bboxA).geometry; + polygonCoordinatesA = [clippedGeometryA.coordinates]; + var clippedGeometryB = h3PolyfillLib.bboxClip(featureGeometry, bboxB).geometry; + polygonCoordinatesB = [clippedGeometryB.coordinates]; + break; + default: + return []; + } + + if (polygonCoordinatesA.length + polygonCoordinatesB.length === 0) { + return []; + } + + let hexesA = polygonCoordinatesA.reduce( + (acc, coordinates) => acc.concat(h3PolyfillLib.polyfill(coordinates, resolution, true)), + [] + ).filter(h => h != null); + let hexesB = polygonCoordinatesB.reduce( + (acc, coordinates) => acc.concat(h3PolyfillLib.polyfill(coordinates, resolution, true)), + [] + ).filter(h => h != null); + hexes = [...hexesA, ...hexesB]; + hexes = [...new Set(hexes)]; + + return hexes; +$$; + +CREATE OR REPLACE FUNCTION @@SF_SCHEMA@@._H3_AVG_EDGE_LENGTH +(resolution INTEGER) +RETURNS DOUBLE +IMMUTABLE +AS $$ + SELECT CASE resolution + WHEN 0 THEN CAST(1281256.011 AS DOUBLE) + WHEN 1 THEN CAST(483056.8391 AS DOUBLE) + WHEN 2 THEN CAST(182512.9565 AS DOUBLE) + WHEN 3 THEN CAST(68979.22179 AS DOUBLE) + WHEN 4 THEN CAST(26071.75968 AS DOUBLE) + WHEN 5 THEN CAST(9854.090990 AS DOUBLE) + WHEN 6 THEN CAST(3724.532667 AS DOUBLE) + WHEN 7 THEN CAST(1406.475763 AS DOUBLE) + WHEN 8 THEN CAST(531.414010 AS DOUBLE) + WHEN 9 THEN CAST(200.786148 AS DOUBLE) + WHEN 10 THEN CAST(75.863783 AS DOUBLE) + WHEN 11 THEN CAST(28.663897 AS DOUBLE) + WHEN 12 THEN CAST(10.830188 AS DOUBLE) + WHEN 13 THEN CAST(4.092010 AS DOUBLE) + WHEN 14 THEN CAST(1.546100 AS DOUBLE) + WHEN 15 THEN CAST(0.584169 AS DOUBLE) + ELSE + NULL + END +$$; + +CREATE OR REPLACE FUNCTION @@SF_SCHEMA@@._H3_POLYFILL_INIT +(geog GEOGRAPHY, resolution INTEGER) +RETURNS ARRAY +IMMUTABLE +AS $$ + SELECT @@SF_SCHEMA@@._H3_POLYFILL_GEOJSON( + CAST( + ST_ASGEOJSON( + @@SF_SCHEMA@@.ST_BUFFER( + geog, + @@SF_SCHEMA@@._H3_AVG_EDGE_LENGTH(resolution) + ) + ) AS STRING), + CAST(resolution AS DOUBLE) + ) +$$; + +CREATE OR REPLACE FUNCTION @@SF_SCHEMA@@._H3_POLYFILL_QUERY +( + input_query STRING, + resolution DOUBLE, + mode STRING, + output_table STRING +) +RETURNS STRING +LANGUAGE JAVASCRIPT +IMMUTABLE +AS $$ + if (!['center', 'intersects', 'contains'].includes(MODE)) { + throw Error('Invalid mode, should be center, intersects, or contains.'); + } + + if (RESOLUTION < 0 || RESOLUTION > 15) { + throw Error('Invalid resolution, should be between 0 and 15.'); + } + + const containmentFunction = (MODE === 'contains') ? 'ST_CONTAINS' : 'ST_INTERSECTS'; + const cellFunction = (MODE === 'center') ? '@@SF_SCHEMA@@.H3_CENTER' : '@@SF_SCHEMA@@.H3_BOUNDARY'; + + const parentResolution = Math.max(0, RESOLUTION - 4) + + return ` + CREATE OR REPLACE TABLE ${OUTPUT_TABLE} CLUSTER BY (h3) AS + WITH __input AS (${INPUT_QUERY}), + __cells AS ( + SELECT CAST(children.value AS STRING) AS h3, i.* + FROM __input AS i, + TABLE(FLATTEN(@@SF_SCHEMA@@._H3_POLYFILL_INIT(geom, ${parentResolution}))) AS parent, + TABLE(FLATTEN(@@SF_SCHEMA@@.H3_TOCHILDREN(CAST(parent.value AS STRING), ${RESOLUTION}))) AS children + ) + SELECT * EXCLUDE(geom) + FROM __cells + WHERE ${containmentFunction}(geom, ${cellFunction}(h3)) + `; +$$; + +CREATE OR REPLACE PROCEDURE @@SF_SCHEMA@@.H3_POLYFILL_TABLE +( + input_query STRING, + resolution INT, + mode STRING, + output_table STRING +) +RETURNS STRING +LANGUAGE SQL +EXECUTE AS CALLER +AS $$ + DECLARE polyfill_query STRING; + BEGIN + polyfill_query := (SELECT @@SF_SCHEMA@@._H3_POLYFILL_QUERY( + :input_query, + CAST(:resolution AS DOUBLE), + :mode, + :output_table + )); + EXECUTE IMMEDIATE polyfill_query; + RETURN 'Polyfill completed.'; + END; +$$; diff --git a/clouds/snowflake/modules/test/h3/H3_POLYFILL_TABLE.test.js b/clouds/snowflake/modules/test/h3/H3_POLYFILL_TABLE.test.js new file mode 100644 index 000000000..2c1b309d7 --- /dev/null +++ b/clouds/snowflake/modules/test/h3/H3_POLYFILL_TABLE.test.js @@ -0,0 +1,26 @@ +const { runQuery } = require('../../../common/test-utils'); + +const SF_SCHEMA = process.env.SF_SCHEMA; + +test('H3_POLYFILL_TABLE should generate the correct query', async () => { + const query = `SELECT @@SF_SCHEMA@@._H3_POLYFILL_QUERY( + 'SELECT geom, name, value FROM ..
', + 12, 'center', + '..' + ) AS output`; + const rows = await runQuery(query); + expect(rows.length).toEqual(1); + expect(rows[0].OUTPUT).toEqual(` + CREATE OR REPLACE TABLE .. CLUSTER BY (h3) AS + WITH __input AS (SELECT geom, name, value FROM ..
), + __cells AS ( + SELECT CAST(children.value AS STRING) AS h3, i.* + FROM __input AS i, + TABLE(FLATTEN(@@SF_SCHEMA@@._H3_POLYFILL_INIT(geom, 8))) AS parent, + TABLE(FLATTEN(@@SF_SCHEMA@@.H3_TOCHILDREN(CAST(parent.value AS STRING), 12))) AS children + ) + SELECT * EXCLUDE(geom) + FROM __cells + WHERE ST_INTERSECTS(geom, @@SF_SCHEMA@@.H3_CENTER(h3)) + `.replace(/@@SF_SCHEMA@@/g, SF_SCHEMA)); +}); \ No newline at end of file