Support general SQL syntax just like v.db.select for the price of les…

…s checks. Now depends on v.db.select producing the list of column names #3090. Test and example included.
OSGeo · Jul 18, 2023 · 7cd9500 · 7cd9500
1 parent 568b1cb
commit 7cd9500
Show file tree

Hide file tree

Showing 3 changed files with 87 additions and 10 deletions.
diff --git a/scripts/v.dissolve/tests/v_dissolve_aggregate_test.py b/scripts/v.dissolve/tests/v_dissolve_aggregate_test.py
@@ -245,7 +245,7 @@ def test_sqlite_agg_accepted(dataset):
 
 
 def test_sqlite_concat(dataset):
-    """SQLite concat text-returning aggregate function works"""
+    """SQLite group concat text-returning aggregate function works"""
     dissolved_vector = "test_sqlite_concat"
     gs.run_command(
         "v.dissolve",
@@ -271,6 +271,34 @@ def test_sqlite_concat(dataset):
         assert sorted(expected.split(",")) == sorted(actual.split(","))
 
 
+def test_sqlite_concat_with_two_parameters(dataset):
+    """SQLite group concat text-returning two-parameter aggregate function works"""
+    dissolved_vector = "test_sqlite_concat_separator"
+    separator = "--+--"
+    gs.run_command(
+        "v.dissolve",
+        input=dataset.vector_name,
+        column=dataset.str_column_name,
+        output=dissolved_vector,
+        aggregate_column=f"group_concat({dataset.int_column_name}, '{separator}')",
+        result_column="concat_values text",
+        aggregate_backend="sql",
+    )
+    records = json.loads(
+        gs.read_command(
+            "v.db.select",
+            map=dissolved_vector,
+            format="json",
+        )
+    )["records"]
+    # Order of records is ignored - they are just sorted.
+    # Order within values of group_concat is defined as arbitrary by SQLite.
+    expected_integers = sorted(["10", "10,10,24", "5,5"])
+    actual_integers = sorted([record["concat_values"] for record in records])
+    for expected, actual in zip(expected_integers, actual_integers):
+        assert sorted(expected.split(",")) == sorted(actual.split(separator))
+
+
 def test_duplicate_columns_and_methods_accepted(dataset):
     """Duplicate aggregate columns and methods are accepted and deduplicated"""
     dissolved_vector = "test_duplicates"

diff --git a/scripts/v.dissolve/v.dissolve.html b/scripts/v.dissolve/v.dissolve.html
@@ -55,7 +55,7 @@ <h3>Attribute aggregation</h3>
 <b>aggregate_columns</b> can contain SQL syntax specifying both columns
 and the functions applied, e.g.,
 <code>aggregate_columns="sum(cows) / sum(animals)"</code>.
-In this case, <b>aggregate_methods</b> needs should be omitted.
+In this case, <b>aggregate_methods</b> should to be omitted.
 This provides the highest flexibility and it is suitable for scripting.
 
 <p>
@@ -168,7 +168,21 @@ <h3>Attribute aggregation</h3>
     aggregate_columns=ACRES
 </pre></div>
 
-The above will create multiple columns for each of the statistics computed
+To inspect the result, we will use <em>v.db.select</em> retrieving only one row
+for <code>DOTURBAN_N == 'Wadesboro'</code>:
+
+<div class="code"><pre>
+v.db.select municipalities where="DOTURBAN_N == 'Wadesboro'" separator=tab
+</pre></div>
+
+The resulting table may look like this:
+
+<div class="code"><pre>
+cat  DOTURBAN_N    ACRES_n    ACRES_min    ACRES_max    ACRES_mean    ACRES_sum
+66   Wadesboro     2          634.987      3935.325     2285.156      4570.312
+</pre></div>
+
+The above created multiple columns for each of the statistics computed
 by default. We can limit the number of statistics computed by specifying
 the method which should be used:
 
@@ -221,7 +235,8 @@ <h3>Aggregating multiple attributes</h3>
 
 While it is often not necessary to specify aggregate methods or names for
 interactive exploratory analysis, specifying both <b>aggregate_methods</b>
-and <b>result_columns</b> manually is a best practice for scripting.
+and <b>result_columns</b> manually is a best practice for scripting
+(unless SQL syntax is used for <b>aggregate_columns</b>, see below).
 
 <h3>Aggregating using SQL syntax</h3>
 
@@ -251,6 +266,33 @@ <h3>Aggregating using SQL syntax</h3>
 Here, <em>v.dissolve</em> doesn't make any assumptions about the resulting
 column types, so we specified both named and the type of each column.
 
+<p>
+When working with general SQL syntax, <em>v.dissolve</em> turns off its checks for
+number of aggregate and result columns to allow for all SQL syntax to be used
+for aggregate columns. This allows us to use also functions with multiple parameters,
+for example specify separator to be used with <em>group_concat</em>:
+
+<div class="code"><pre>
+    v.dissolve input=boundary_municp column=DOTURBAN_N output=municipalities_7 \
+        aggregate_columns="group_concat(MB_NAME, ';')" \
+        result_columns="names TEXT"
+</pre></div>
+
+To inspect the result, we will use <em>v.db.select</em> retrieving only one row
+for <code>DOTURBAN_N == 'Wadesboro'</code>:
+
+<div class="code"><pre>
+v.db.select municipalities_7 where="DOTURBAN_N == 'Wadesboro'" separator=tab
+</pre></div>
+
+The resulting table may look like this:
+
+<div class="code"><pre>
+cat	DOTURBAN_N	names
+66	Wadesboro	Wadesboro;Lilesville
+</pre></div>
+
+
 <h2>SEE ALSO</h2>
 
 <em>

diff --git a/scripts/v.dissolve/v.dissolve.py b/scripts/v.dissolve/v.dissolve.py
@@ -326,7 +326,12 @@ def create_or_check_result_columns_or_fatal(
                 methods=len(methods),
             )
         )
-    if len(result_columns) != len(columns_to_aggregate):
+    # When methods are not set with sql backend, we might be dealing with the general
+    # SQL syntax provided for columns, so we can't parse that easily, so let's not
+    # check that here.
+    if (methods or backend != "sql") and len(result_columns) != len(
+        columns_to_aggregate
+    ):
         gs.fatal(
             _(
                 "The number of result columns ({result_columns}) needs to be "
@@ -383,7 +388,7 @@ def aggregate_attributes_sql(
     result_columns,
 ):
     """Aggregate values in selected columns grouped by column using SQL backend"""
-    if len(columns_to_aggregate) != len(result_columns):
+    if methods and len(columns_to_aggregate) != len(result_columns):
         raise ValueError(
             "Number of columns_to_aggregate and result_columns must be the same"
         )
@@ -407,7 +412,7 @@ def aggregate_attributes_sql(
         select_columns = columns_to_aggregate
         column_types = None
 
-    records = json.loads(
+    data = json.loads(
         gs.read_command(
             "v.db.select",
             map=input_name,
@@ -416,7 +421,9 @@ def aggregate_attributes_sql(
             group=column,
             format="json",
         )
-    )["records"]
+    )
+    # We added the group column to the select, so we need to skip it here.
+    select_column_names = [item["name"] for item in data["info"]["columns"]][1:]
     updates = []
     add_columns = []
     if column_types:
@@ -432,13 +439,13 @@ def aggregate_attributes_sql(
             column_name, column_type = definition.split(" ", maxsplit=1)
             result_columns.append(column_name)
             column_types.append(column_type)
-    for row in records:
+    for row in data["records"]:
         where = column_value_to_where(column, row[column], quote=quote_column)
         for (
             result_column,
             column_type,
             key,
-        ) in zip(result_columns, column_types, select_columns):
+        ) in zip(result_columns, column_types, select_column_names):
             updates.append(
                 {
                     "column": result_column,