Skip to content

Commit

Permalink
Support general SQL syntax just like v.db.select for the price of les…
Browse files Browse the repository at this point in the history
…s checks. Now depends on v.db.select producing the list of column names #3090. Test and example included.
  • Loading branch information
wenzeslaus committed Jul 18, 2023
1 parent 568b1cb commit 7cd9500
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 10 deletions.
30 changes: 29 additions & 1 deletion scripts/v.dissolve/tests/v_dissolve_aggregate_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def test_sqlite_agg_accepted(dataset):


def test_sqlite_concat(dataset):
"""SQLite concat text-returning aggregate function works"""
"""SQLite group concat text-returning aggregate function works"""
dissolved_vector = "test_sqlite_concat"
gs.run_command(
"v.dissolve",
Expand All @@ -271,6 +271,34 @@ def test_sqlite_concat(dataset):
assert sorted(expected.split(",")) == sorted(actual.split(","))


def test_sqlite_concat_with_two_parameters(dataset):
"""SQLite group concat text-returning two-parameter aggregate function works"""
dissolved_vector = "test_sqlite_concat_separator"
separator = "--+--"
gs.run_command(
"v.dissolve",
input=dataset.vector_name,
column=dataset.str_column_name,
output=dissolved_vector,
aggregate_column=f"group_concat({dataset.int_column_name}, '{separator}')",
result_column="concat_values text",
aggregate_backend="sql",
)
records = json.loads(
gs.read_command(
"v.db.select",
map=dissolved_vector,
format="json",
)
)["records"]
# Order of records is ignored - they are just sorted.
# Order within values of group_concat is defined as arbitrary by SQLite.
expected_integers = sorted(["10", "10,10,24", "5,5"])
actual_integers = sorted([record["concat_values"] for record in records])
for expected, actual in zip(expected_integers, actual_integers):
assert sorted(expected.split(",")) == sorted(actual.split(separator))


def test_duplicate_columns_and_methods_accepted(dataset):
"""Duplicate aggregate columns and methods are accepted and deduplicated"""
dissolved_vector = "test_duplicates"
Expand Down
48 changes: 45 additions & 3 deletions scripts/v.dissolve/v.dissolve.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ <h3>Attribute aggregation</h3>
<b>aggregate_columns</b> can contain SQL syntax specifying both columns
and the functions applied, e.g.,
<code>aggregate_columns="sum(cows) / sum(animals)"</code>.
In this case, <b>aggregate_methods</b> needs should be omitted.
In this case, <b>aggregate_methods</b> should to be omitted.
This provides the highest flexibility and it is suitable for scripting.

<p>
Expand Down Expand Up @@ -168,7 +168,21 @@ <h3>Attribute aggregation</h3>
aggregate_columns=ACRES
</pre></div>

The above will create multiple columns for each of the statistics computed
To inspect the result, we will use <em>v.db.select</em> retrieving only one row
for <code>DOTURBAN_N == 'Wadesboro'</code>:

<div class="code"><pre>
v.db.select municipalities where="DOTURBAN_N == 'Wadesboro'" separator=tab
</pre></div>

The resulting table may look like this:

<div class="code"><pre>
cat DOTURBAN_N ACRES_n ACRES_min ACRES_max ACRES_mean ACRES_sum
66 Wadesboro 2 634.987 3935.325 2285.156 4570.312
</pre></div>

The above created multiple columns for each of the statistics computed
by default. We can limit the number of statistics computed by specifying
the method which should be used:

Expand Down Expand Up @@ -221,7 +235,8 @@ <h3>Aggregating multiple attributes</h3>

While it is often not necessary to specify aggregate methods or names for
interactive exploratory analysis, specifying both <b>aggregate_methods</b>
and <b>result_columns</b> manually is a best practice for scripting.
and <b>result_columns</b> manually is a best practice for scripting
(unless SQL syntax is used for <b>aggregate_columns</b>, see below).

<h3>Aggregating using SQL syntax</h3>

Expand Down Expand Up @@ -251,6 +266,33 @@ <h3>Aggregating using SQL syntax</h3>
Here, <em>v.dissolve</em> doesn't make any assumptions about the resulting
column types, so we specified both named and the type of each column.

<p>
When working with general SQL syntax, <em>v.dissolve</em> turns off its checks for
number of aggregate and result columns to allow for all SQL syntax to be used
for aggregate columns. This allows us to use also functions with multiple parameters,
for example specify separator to be used with <em>group_concat</em>:

<div class="code"><pre>
v.dissolve input=boundary_municp column=DOTURBAN_N output=municipalities_7 \
aggregate_columns="group_concat(MB_NAME, ';')" \
result_columns="names TEXT"
</pre></div>

To inspect the result, we will use <em>v.db.select</em> retrieving only one row
for <code>DOTURBAN_N == 'Wadesboro'</code>:

<div class="code"><pre>
v.db.select municipalities_7 where="DOTURBAN_N == 'Wadesboro'" separator=tab
</pre></div>

The resulting table may look like this:

<div class="code"><pre>
cat DOTURBAN_N names
66 Wadesboro Wadesboro;Lilesville
</pre></div>


<h2>SEE ALSO</h2>

<em>
Expand Down
19 changes: 13 additions & 6 deletions scripts/v.dissolve/v.dissolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,12 @@ def create_or_check_result_columns_or_fatal(
methods=len(methods),
)
)
if len(result_columns) != len(columns_to_aggregate):
# When methods are not set with sql backend, we might be dealing with the general
# SQL syntax provided for columns, so we can't parse that easily, so let's not
# check that here.
if (methods or backend != "sql") and len(result_columns) != len(
columns_to_aggregate
):
gs.fatal(
_(
"The number of result columns ({result_columns}) needs to be "
Expand Down Expand Up @@ -383,7 +388,7 @@ def aggregate_attributes_sql(
result_columns,
):
"""Aggregate values in selected columns grouped by column using SQL backend"""
if len(columns_to_aggregate) != len(result_columns):
if methods and len(columns_to_aggregate) != len(result_columns):
raise ValueError(
"Number of columns_to_aggregate and result_columns must be the same"
)
Expand All @@ -407,7 +412,7 @@ def aggregate_attributes_sql(
select_columns = columns_to_aggregate
column_types = None

records = json.loads(
data = json.loads(
gs.read_command(
"v.db.select",
map=input_name,
Expand All @@ -416,7 +421,9 @@ def aggregate_attributes_sql(
group=column,
format="json",
)
)["records"]
)
# We added the group column to the select, so we need to skip it here.
select_column_names = [item["name"] for item in data["info"]["columns"]][1:]
updates = []
add_columns = []
if column_types:
Expand All @@ -432,13 +439,13 @@ def aggregate_attributes_sql(
column_name, column_type = definition.split(" ", maxsplit=1)
result_columns.append(column_name)
column_types.append(column_type)
for row in records:
for row in data["records"]:
where = column_value_to_where(column, row[column], quote=quote_column)
for (
result_column,
column_type,
key,
) in zip(result_columns, column_types, select_columns):
) in zip(result_columns, column_types, select_column_names):
updates.append(
{
"column": result_column,
Expand Down

0 comments on commit 7cd9500

Please sign in to comment.