fix: optimise list query in persist/sqldb/workflow_archive.go · bom-d-van/argo-workflows@9856df7

Commit

fix: optimise list query in persist/sqldb/workflow_archive.go

the current implementation appears to be causing postgres always unmarshalling workflow json
payload for all the records in the table. by adopting a subquery approach, we are able to optimise
the query from a runtime of 11495.734 ms to 44.713 ms. the data size is about 417481 argo_archived_workflows
and 1794624 argo_archived_workflows_labels.

this new change is backward compatible and it is tested on our prodction env (using postgresql).

realated issue: argoproj#13295

query change example:

previous:

```sql
SELECT name,
       namespace,
       UID,
       phase,
       startedat,
       finishedat,
       coalesce((workflow::JSON)->'metadata'->>'labels', '{}') AS labels,
       coalesce((workflow::JSON)->'metadata'->>'annotations', '{}') AS annotations,
       coalesce((workflow::JSON)->'status'->>'progress', '') AS progress,
       coalesce((workflow::JSON)->'metadata'->>'creationTimestamp', '') AS creationtimestamp,
       (workflow::JSON)->'spec'->>'suspend' AS suspend,
       coalesce((workflow::JSON)->'status'->>'message', '') AS message,
       coalesce((workflow::JSON)->'status'->>'estimatedDuration', '0') AS estimatedduration,
       coalesce((workflow::JSON)->'status'->>'resourcesDuration', '{}') AS resourcesduration
FROM "argo_archived_workflows"
WHERE (("clustername" = 'default'
        AND "instanceid" = '')
       AND "namespace" = 'argo-map'
       AND EXISTS
         (SELECT 1
          FROM argo_archived_workflows_labels
          WHERE clustername = argo_archived_workflows.clustername
            AND UID = argo_archived_workflows.uid
            AND name = 'workflows.argoproj.io/phase'
            AND value = 'Succeeded')
       AND EXISTS
         (SELECT 1
          FROM argo_archived_workflows_labels
          WHERE clustername = argo_archived_workflows.clustername
            AND UID = argo_archived_workflows.uid
            AND name = 'workflows.argoproj.io/workflow-template'
            AND value = 'mapping1-pipeline-template-with-nfs'))
ORDER BY "startedat" DESC
LIMIT 1;
```

now:

```sql
SELECT name,
       namespace,
       UID,
       phase,
       startedat,
       finishedat,
       coalesce((workflow::JSON)->'metadata'->>'labels', '{}') AS labels,
       coalesce((workflow::JSON)->'metadata'->>'annotations', '{}') AS annotations,
       coalesce((workflow::JSON)->'status'->>'progress', '') AS progress,
       coalesce((workflow::JSON)->'metadata'->>'creationTimestamp', '') AS creationtimestamp,
       (workflow::JSON)->'spec'->>'suspend' AS suspend,
       coalesce((workflow::JSON)->'status'->>'message', '') AS message,
       coalesce((workflow::JSON)->'status'->>'estimatedDuration', '0') AS estimatedduration,
       coalesce((workflow::JSON)->'status'->>'resourcesDuration', '{}') AS resourcesduration
FROM "argo_archived_workflows"
WHERE "clustername" = 'default'
  AND UID IN
    (SELECT UID
     FROM "argo_archived_workflows"
     WHERE (("clustername" = 'default'
             AND "instanceid" = '')
            AND "namespace" = 'argo-map'
            AND EXISTS
              (SELECT 1
               FROM argo_archived_workflows_labels
               WHERE clustername = argo_archived_workflows.clustername
                 AND UID = argo_archived_workflows.uid
                 AND name = 'workflows.argoproj.io/phase'
                 AND value = 'Succeeded')
            AND EXISTS
              (SELECT 1
               FROM argo_archived_workflows_labels
               WHERE clustername = argo_archived_workflows.clustername
                 AND UID = argo_archived_workflows.uid
                 AND name = 'workflows.argoproj.io/workflow-template'
                 AND value = 'mapping1-pipeline-template-with-nfs'))
     ORDER BY "startedat" DESC
     LIMIT 1);
```

Signed-off-by: Xiaofan Hu <[email protected]>

Loading branch information

Xiaofan Hu authored and bom-d-van committed Sep 6, 2024

1 parent 7173a27 commit 9856df7

persist/sqldb/workflow_archive.go

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -163,16 +163,20 @@ func (r *workflowArchive) ListWorkflows(options sutils.ListOptions) (wfv1.Workfl
  
    		return nil, err

    	}

    	selector := r.session.SQL().

    		Select(selectQuery).

    	subSelector := r.session.SQL().

    		Select(db.Raw("uid")).

    		From(archiveTableName).

    		Where(r.clusterManagedNamespaceAndInstanceID())

    	selector, err = BuildArchivedWorkflowSelector(selector, archiveTableName, archiveLabelsTableName, r.dbType, options, false)

    	subSelector, err = BuildArchivedWorkflowSelector(subSelector, archiveTableName, archiveLabelsTableName, r.dbType, options, false)

    	if err != nil {

    		return nil, err

    	}

    	selector := r.session.SQL().Select(selectQuery).From(archiveTableName).Where(

    		r.clusterManagedNamespaceAndInstanceID().And(db.Cond{"uid IN": subSelector}),

    	)

    	err = selector.All(&archivedWfs)

    	if err != nil {

    		return nil, err

0 comments on commit `9856df7`

Please sign in to comment.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Commit

There are no files selected for viewing

0 comments on commit `9856df7`

Commit

There are no files selected for viewing

0 comments on commit 9856df7

0 comments on commit `9856df7`