From 9856df7e116efb943f5c9e1f0cc12435b860dbc3 Mon Sep 17 00:00:00 2001 From: Xiaofan Hu Date: Thu, 29 Aug 2024 18:57:52 +0800 Subject: [PATCH] fix: optimise list query in persist/sqldb/workflow_archive.go the current implementation appears to be causing postgres always unmarshalling workflow json payload for all the records in the table. by adopting a subquery approach, we are able to optimise the query from a runtime of 11495.734 ms to 44.713 ms. the data size is about 417481 argo_archived_workflows and 1794624 argo_archived_workflows_labels. this new change is backward compatible and it is tested on our prodction env (using postgresql). realated issue: https://github.com/argoproj/argo-workflows/issues/13295 query change example: previous: ```sql SELECT name, namespace, UID, phase, startedat, finishedat, coalesce((workflow::JSON)->'metadata'->>'labels', '{}') AS labels, coalesce((workflow::JSON)->'metadata'->>'annotations', '{}') AS annotations, coalesce((workflow::JSON)->'status'->>'progress', '') AS progress, coalesce((workflow::JSON)->'metadata'->>'creationTimestamp', '') AS creationtimestamp, (workflow::JSON)->'spec'->>'suspend' AS suspend, coalesce((workflow::JSON)->'status'->>'message', '') AS message, coalesce((workflow::JSON)->'status'->>'estimatedDuration', '0') AS estimatedduration, coalesce((workflow::JSON)->'status'->>'resourcesDuration', '{}') AS resourcesduration FROM "argo_archived_workflows" WHERE (("clustername" = 'default' AND "instanceid" = '') AND "namespace" = 'argo-map' AND EXISTS (SELECT 1 FROM argo_archived_workflows_labels WHERE clustername = argo_archived_workflows.clustername AND UID = argo_archived_workflows.uid AND name = 'workflows.argoproj.io/phase' AND value = 'Succeeded') AND EXISTS (SELECT 1 FROM argo_archived_workflows_labels WHERE clustername = argo_archived_workflows.clustername AND UID = argo_archived_workflows.uid AND name = 'workflows.argoproj.io/workflow-template' AND value = 'mapping1-pipeline-template-with-nfs')) ORDER BY "startedat" DESC LIMIT 1; ``` now: ```sql SELECT name, namespace, UID, phase, startedat, finishedat, coalesce((workflow::JSON)->'metadata'->>'labels', '{}') AS labels, coalesce((workflow::JSON)->'metadata'->>'annotations', '{}') AS annotations, coalesce((workflow::JSON)->'status'->>'progress', '') AS progress, coalesce((workflow::JSON)->'metadata'->>'creationTimestamp', '') AS creationtimestamp, (workflow::JSON)->'spec'->>'suspend' AS suspend, coalesce((workflow::JSON)->'status'->>'message', '') AS message, coalesce((workflow::JSON)->'status'->>'estimatedDuration', '0') AS estimatedduration, coalesce((workflow::JSON)->'status'->>'resourcesDuration', '{}') AS resourcesduration FROM "argo_archived_workflows" WHERE "clustername" = 'default' AND UID IN (SELECT UID FROM "argo_archived_workflows" WHERE (("clustername" = 'default' AND "instanceid" = '') AND "namespace" = 'argo-map' AND EXISTS (SELECT 1 FROM argo_archived_workflows_labels WHERE clustername = argo_archived_workflows.clustername AND UID = argo_archived_workflows.uid AND name = 'workflows.argoproj.io/phase' AND value = 'Succeeded') AND EXISTS (SELECT 1 FROM argo_archived_workflows_labels WHERE clustername = argo_archived_workflows.clustername AND UID = argo_archived_workflows.uid AND name = 'workflows.argoproj.io/workflow-template' AND value = 'mapping1-pipeline-template-with-nfs')) ORDER BY "startedat" DESC LIMIT 1); ``` Signed-off-by: Xiaofan Hu --- persist/sqldb/workflow_archive.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/persist/sqldb/workflow_archive.go b/persist/sqldb/workflow_archive.go index c18c2017b369..29da4cb7fd60 100644 --- a/persist/sqldb/workflow_archive.go +++ b/persist/sqldb/workflow_archive.go @@ -163,16 +163,20 @@ func (r *workflowArchive) ListWorkflows(options sutils.ListOptions) (wfv1.Workfl return nil, err } - selector := r.session.SQL(). - Select(selectQuery). + subSelector := r.session.SQL(). + Select(db.Raw("uid")). From(archiveTableName). Where(r.clusterManagedNamespaceAndInstanceID()) - selector, err = BuildArchivedWorkflowSelector(selector, archiveTableName, archiveLabelsTableName, r.dbType, options, false) + subSelector, err = BuildArchivedWorkflowSelector(subSelector, archiveTableName, archiveLabelsTableName, r.dbType, options, false) if err != nil { return nil, err } + selector := r.session.SQL().Select(selectQuery).From(archiveTableName).Where( + r.clusterManagedNamespaceAndInstanceID().And(db.Cond{"uid IN": subSelector}), + ) + err = selector.All(&archivedWfs) if err != nil { return nil, err