From 9856df7e116efb943f5c9e1f0cc12435b860dbc3 Mon Sep 17 00:00:00 2001
From: Xiaofan Hu <xiaofanhu@deeproute.ai>
Date: Thu, 29 Aug 2024 18:57:52 +0800
Subject: [PATCH] fix: optimise list query in persist/sqldb/workflow_archive.go

the current implementation appears to be causing postgres always unmarshalling workflow json
payload for all the records in the table. by adopting a subquery approach, we are able to optimise
the query from a runtime of 11495.734 ms to 44.713 ms. the data size is about 417481 argo_archived_workflows
and 1794624 argo_archived_workflows_labels.

this new change is backward compatible and it is tested on our prodction env (using postgresql).

realated issue: https://github.com/argoproj/argo-workflows/issues/13295

query change example:

previous:

```sql
SELECT name,
       namespace,
       UID,
       phase,
       startedat,
       finishedat,
       coalesce((workflow::JSON)->'metadata'->>'labels', '{}') AS labels,
       coalesce((workflow::JSON)->'metadata'->>'annotations', '{}') AS annotations,
       coalesce((workflow::JSON)->'status'->>'progress', '') AS progress,
       coalesce((workflow::JSON)->'metadata'->>'creationTimestamp', '') AS creationtimestamp,
       (workflow::JSON)->'spec'->>'suspend' AS suspend,
       coalesce((workflow::JSON)->'status'->>'message', '') AS message,
       coalesce((workflow::JSON)->'status'->>'estimatedDuration', '0') AS estimatedduration,
       coalesce((workflow::JSON)->'status'->>'resourcesDuration', '{}') AS resourcesduration
FROM "argo_archived_workflows"
WHERE (("clustername" = 'default'
        AND "instanceid" = '')
       AND "namespace" = 'argo-map'
       AND EXISTS
         (SELECT 1
          FROM argo_archived_workflows_labels
          WHERE clustername = argo_archived_workflows.clustername
            AND UID = argo_archived_workflows.uid
            AND name = 'workflows.argoproj.io/phase'
            AND value = 'Succeeded')
       AND EXISTS
         (SELECT 1
          FROM argo_archived_workflows_labels
          WHERE clustername = argo_archived_workflows.clustername
            AND UID = argo_archived_workflows.uid
            AND name = 'workflows.argoproj.io/workflow-template'
            AND value = 'mapping1-pipeline-template-with-nfs'))
ORDER BY "startedat" DESC
LIMIT 1;
```

now:

```sql
SELECT name,
       namespace,
       UID,
       phase,
       startedat,
       finishedat,
       coalesce((workflow::JSON)->'metadata'->>'labels', '{}') AS labels,
       coalesce((workflow::JSON)->'metadata'->>'annotations', '{}') AS annotations,
       coalesce((workflow::JSON)->'status'->>'progress', '') AS progress,
       coalesce((workflow::JSON)->'metadata'->>'creationTimestamp', '') AS creationtimestamp,
       (workflow::JSON)->'spec'->>'suspend' AS suspend,
       coalesce((workflow::JSON)->'status'->>'message', '') AS message,
       coalesce((workflow::JSON)->'status'->>'estimatedDuration', '0') AS estimatedduration,
       coalesce((workflow::JSON)->'status'->>'resourcesDuration', '{}') AS resourcesduration
FROM "argo_archived_workflows"
WHERE "clustername" = 'default'
  AND UID IN
    (SELECT UID
     FROM "argo_archived_workflows"
     WHERE (("clustername" = 'default'
             AND "instanceid" = '')
            AND "namespace" = 'argo-map'
            AND EXISTS
              (SELECT 1
               FROM argo_archived_workflows_labels
               WHERE clustername = argo_archived_workflows.clustername
                 AND UID = argo_archived_workflows.uid
                 AND name = 'workflows.argoproj.io/phase'
                 AND value = 'Succeeded')
            AND EXISTS
              (SELECT 1
               FROM argo_archived_workflows_labels
               WHERE clustername = argo_archived_workflows.clustername
                 AND UID = argo_archived_workflows.uid
                 AND name = 'workflows.argoproj.io/workflow-template'
                 AND value = 'mapping1-pipeline-template-with-nfs'))
     ORDER BY "startedat" DESC
     LIMIT 1);
```

Signed-off-by: Xiaofan Hu <bom.d.van@gmail.com>
---
 persist/sqldb/workflow_archive.go | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/persist/sqldb/workflow_archive.go b/persist/sqldb/workflow_archive.go
index c18c2017b369..29da4cb7fd60 100644
--- a/persist/sqldb/workflow_archive.go
+++ b/persist/sqldb/workflow_archive.go
@@ -163,16 +163,20 @@ func (r *workflowArchive) ListWorkflows(options sutils.ListOptions) (wfv1.Workfl
 		return nil, err
 	}
 
-	selector := r.session.SQL().
-		Select(selectQuery).
+	subSelector := r.session.SQL().
+		Select(db.Raw("uid")).
 		From(archiveTableName).
 		Where(r.clusterManagedNamespaceAndInstanceID())
 
-	selector, err = BuildArchivedWorkflowSelector(selector, archiveTableName, archiveLabelsTableName, r.dbType, options, false)
+	subSelector, err = BuildArchivedWorkflowSelector(subSelector, archiveTableName, archiveLabelsTableName, r.dbType, options, false)
 	if err != nil {
 		return nil, err
 	}
 
+	selector := r.session.SQL().Select(selectQuery).From(archiveTableName).Where(
+		r.clusterManagedNamespaceAndInstanceID().And(db.Cond{"uid IN": subSelector}),
+	)
+
 	err = selector.All(&archivedWfs)
 	if err != nil {
 		return nil, err