-
-
Notifications
You must be signed in to change notification settings - Fork 309
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
v.dissolve: Compute attribute aggregate statistics (#2388)
In addition to geometry dissolving, compute aggregate statistics for the attribute values of dissolved features with v.db.univar and SQL. v.db.select with group is used to obtain unique values of the column the dissolving is based on. Add column and update now happens for every value, column, and statistics. Originally implemented with v.db.univar only because it has a good set of functions, but direct SQL is faster and potentially can have more functions (although default SQLite has less). Auto-generates names and combinations of column-method for convenience, but when all needed parameters are provided, uses them as is. Has documentation, examples, image for original functionality, and test (image generated in notebook). Uses plural for columns and methods. Removes duplicate columns and methods for non-explicit automatic (interactive) result column handling. Support SQL expressions as columns (as in v.db.update query_column or v.db.select columns). Supports general SQL syntax just like v.db.select for the price of less checks. Supports also text-returning aggregate functions and functions with multiple parameters such as SQLite group_concat. Supports any layer, not just 1, for attributes. Uses a simple SQL escape function to double single quotes. Requires v.db.univar JSON output and v.db.select column info in JSON output. Handles cleanup from the main function. Removes global variables. Uses PID and node name for the temporary vector. Partially modernizes the existing code by using gs alias instead of grass alias. Improves author lists.
- Loading branch information
1 parent
3083e78
commit 9d44603
Showing
10 changed files
with
2,018 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,248 @@ | ||
"""Fixtures for v.dissolve tests""" | ||
|
||
from types import SimpleNamespace | ||
|
||
import pytest | ||
|
||
import grass.script as gs | ||
import grass.script.setup as grass_setup | ||
|
||
|
||
def updates_as_transaction(table, cat_column, column, column_quote, cats, values): | ||
"""Create SQL statement for categories and values for a given column""" | ||
sql = ["BEGIN TRANSACTION"] | ||
if column_quote: | ||
quote = "'" | ||
else: | ||
quote = "" | ||
for cat, value in zip(cats, values): | ||
sql.append( | ||
f"UPDATE {table} SET {column} = {quote}{value}{quote} " | ||
f"WHERE {cat_column} = {cat};" | ||
) | ||
sql.append("END TRANSACTION") | ||
return "\n".join(sql) | ||
|
||
|
||
def value_update_by_category(map_name, layer, column_name, cats, values): | ||
"""Update column value for multiple rows based on category""" | ||
db_info = gs.vector_db(map_name)[layer] | ||
table = db_info["table"] | ||
database = db_info["database"] | ||
driver = db_info["driver"] | ||
cat_column = "cat" | ||
column_type = gs.vector_columns(map_name, layer)[column_name] | ||
column_quote = bool(column_type["type"] in ("CHARACTER", "TEXT")) | ||
sql = updates_as_transaction( | ||
table=table, | ||
cat_column=cat_column, | ||
column=column_name, | ||
column_quote=column_quote, | ||
cats=cats, | ||
values=values, | ||
) | ||
gs.write_command( | ||
"db.execute", input="-", database=database, driver=driver, stdin=sql | ||
) | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def dataset(tmp_path_factory): | ||
"""Creates a session with a mapset which has vector with a float column""" | ||
tmp_path = tmp_path_factory.mktemp("dataset") | ||
location = "test" | ||
point_map_name = "points" | ||
map_name = "areas" | ||
int_column_name = "int_value" | ||
float_column_name = "double_value" | ||
str_column_name = "str_value" | ||
|
||
cats = [1, 2, 3, 4, 5, 6] | ||
int_values = [10, 10, 10, 5, 24, 5] | ||
float_values = [100.78, 102.78, 109.78, 104.78, 103.78, 105.78] | ||
str_values = ["apples", "oranges", "oranges", "plumbs", "oranges", "plumbs"] | ||
num_points = len(cats) | ||
|
||
gs.core._create_location_xy(tmp_path, location) # pylint: disable=protected-access | ||
with grass_setup.init(tmp_path / location): | ||
gs.run_command("g.region", s=0, n=80, w=0, e=120, b=0, t=50, res=10, res3=10) | ||
gs.run_command("v.random", output=point_map_name, npoints=num_points, seed=42) | ||
gs.run_command("v.voronoi", input=point_map_name, output=map_name) | ||
gs.run_command( | ||
"v.db.addtable", | ||
map=map_name, | ||
columns=[ | ||
f"{int_column_name} integer", | ||
f"{float_column_name} double precision", | ||
f"{str_column_name} text", | ||
], | ||
) | ||
value_update_by_category( | ||
map_name=map_name, | ||
layer=1, | ||
column_name=int_column_name, | ||
cats=cats, | ||
values=int_values, | ||
) | ||
value_update_by_category( | ||
map_name=map_name, | ||
layer=1, | ||
column_name=float_column_name, | ||
cats=cats, | ||
values=float_values, | ||
) | ||
value_update_by_category( | ||
map_name=map_name, | ||
layer=1, | ||
column_name=str_column_name, | ||
cats=cats, | ||
values=str_values, | ||
) | ||
yield SimpleNamespace( | ||
vector_name=map_name, | ||
int_column_name=int_column_name, | ||
int_values=int_values, | ||
float_column_name=float_column_name, | ||
float_values=float_values, | ||
str_column_name=str_column_name, | ||
str_column_values=str_values, | ||
) | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def discontinuous_dataset(tmp_path_factory): | ||
"""Creates a session with a mapset which has vector with a float column""" | ||
tmp_path = tmp_path_factory.mktemp("discontinuous_dataset") | ||
location = "test" | ||
point_map_name = "points" | ||
map_name = "areas" | ||
int_column_name = "int_value" | ||
float_column_name = "double_value" | ||
str_column_name = "str_value" | ||
|
||
cats = [1, 2, 3, 4, 5, 6] | ||
int_values = [10, 12, 10, 5, 24, 24] | ||
float_values = [100.78, 102.78, 109.78, 104.78, 103.78, 105.78] | ||
str_values = ["apples", "plumbs", "apples", "plumbs", "oranges", "oranges"] | ||
num_points = len(cats) | ||
|
||
gs.core._create_location_xy(tmp_path, location) # pylint: disable=protected-access | ||
with grass_setup.init(tmp_path / location): | ||
gs.run_command("g.region", s=0, n=80, w=0, e=120, b=0, t=50, res=10, res3=10) | ||
gs.run_command("v.random", output=point_map_name, npoints=num_points, seed=42) | ||
gs.run_command("v.voronoi", input=point_map_name, output=map_name) | ||
gs.run_command( | ||
"v.db.addtable", | ||
map=map_name, | ||
columns=[ | ||
f"{int_column_name} integer", | ||
f"{float_column_name} double precision", | ||
f"{str_column_name} text", | ||
], | ||
) | ||
value_update_by_category( | ||
map_name=map_name, | ||
layer=1, | ||
column_name=int_column_name, | ||
cats=cats, | ||
values=int_values, | ||
) | ||
value_update_by_category( | ||
map_name=map_name, | ||
layer=1, | ||
column_name=float_column_name, | ||
cats=cats, | ||
values=float_values, | ||
) | ||
value_update_by_category( | ||
map_name=map_name, | ||
layer=1, | ||
column_name=str_column_name, | ||
cats=cats, | ||
values=str_values, | ||
) | ||
yield SimpleNamespace( | ||
vector_name=map_name, | ||
int_column_name=int_column_name, | ||
int_values=int_values, | ||
float_column_name=float_column_name, | ||
float_values=float_values, | ||
str_column_name=str_column_name, | ||
str_column_values=str_values, | ||
) | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def dataset_layer_2(tmp_path_factory): | ||
"""Creates a session with a mapset which has vector with a float column""" | ||
tmp_path = tmp_path_factory.mktemp("dataset_layer_2") | ||
location = "test" | ||
point_map_name = "points" | ||
point_map_name_layer_2 = "points2" | ||
map_name = "areas" | ||
int_column_name = "int_value" | ||
float_column_name = "double_value" | ||
str_column_name = "str_value" | ||
|
||
cats = [1, 2, 3, 4, 5, 6] | ||
int_values = [10, 10, 10, 5, 24, 5] | ||
float_values = [100.78, 102.78, 109.78, 104.78, 103.78, 105.78] | ||
str_values = ["apples", "oranges", "oranges", "plumbs", "oranges", "plumbs"] | ||
num_points = len(cats) | ||
|
||
layer = 2 | ||
|
||
gs.core._create_location_xy(tmp_path, location) # pylint: disable=protected-access | ||
with grass_setup.init(tmp_path / location): | ||
gs.run_command("g.region", s=0, n=80, w=0, e=120, b=0, t=50, res=10, res3=10) | ||
gs.run_command("v.random", output=point_map_name, npoints=num_points, seed=42) | ||
gs.run_command( | ||
"v.category", | ||
input=point_map_name, | ||
layer=[1, layer], | ||
output=point_map_name_layer_2, | ||
option="transfer", | ||
) | ||
gs.run_command( | ||
"v.voronoi", input=point_map_name_layer_2, layer=layer, output=map_name | ||
) | ||
gs.run_command( | ||
"v.db.addtable", | ||
map=map_name, | ||
layer=layer, | ||
columns=[ | ||
f"{int_column_name} integer", | ||
f"{float_column_name} double precision", | ||
f"{str_column_name} text", | ||
], | ||
) | ||
value_update_by_category( | ||
map_name=map_name, | ||
layer=layer, | ||
column_name=int_column_name, | ||
cats=cats, | ||
values=int_values, | ||
) | ||
value_update_by_category( | ||
map_name=map_name, | ||
layer=layer, | ||
column_name=float_column_name, | ||
cats=cats, | ||
values=float_values, | ||
) | ||
value_update_by_category( | ||
map_name=map_name, | ||
layer=layer, | ||
column_name=str_column_name, | ||
cats=cats, | ||
values=str_values, | ||
) | ||
yield SimpleNamespace( | ||
vector_name=map_name, | ||
int_column_name=int_column_name, | ||
int_values=int_values, | ||
float_column_name=float_column_name, | ||
float_values=float_values, | ||
str_column_name=str_column_name, | ||
str_column_values=str_values, | ||
) |
Oops, something went wrong.