Skip to content

Commit

Permalink
fix: optimise list query in persist/sqldb/workflow_archive.go
Browse files Browse the repository at this point in the history
the current implementation appears to be causing postgres always unmarshalling workflow json
payload for all the records in the table. by adopting a subquery approach, we are able to optimise
the query from a runtime of 11495.734 ms to 44.713 ms. the data size is about 417481 argo_archived_workflows
and 1794624 argo_archived_workflows_labels.

this new change is backward compatible and it is tested on our prodction env (using postgresql).

realated issue: argoproj#13295

query change example:

previous:

```sql
SELECT name,
       namespace,
       UID,
       phase,
       startedat,
       finishedat,
       coalesce((workflow::JSON)->'metadata'->>'labels', '{}') AS labels,
       coalesce((workflow::JSON)->'metadata'->>'annotations', '{}') AS annotations,
       coalesce((workflow::JSON)->'status'->>'progress', '') AS progress,
       coalesce((workflow::JSON)->'metadata'->>'creationTimestamp', '') AS creationtimestamp,
       (workflow::JSON)->'spec'->>'suspend' AS suspend,
       coalesce((workflow::JSON)->'status'->>'message', '') AS message,
       coalesce((workflow::JSON)->'status'->>'estimatedDuration', '0') AS estimatedduration,
       coalesce((workflow::JSON)->'status'->>'resourcesDuration', '{}') AS resourcesduration
FROM "argo_archived_workflows"
WHERE (("clustername" = 'default'
        AND "instanceid" = '')
       AND "namespace" = 'argo-map'
       AND EXISTS
         (SELECT 1
          FROM argo_archived_workflows_labels
          WHERE clustername = argo_archived_workflows.clustername
            AND UID = argo_archived_workflows.uid
            AND name = 'workflows.argoproj.io/phase'
            AND value = 'Succeeded')
       AND EXISTS
         (SELECT 1
          FROM argo_archived_workflows_labels
          WHERE clustername = argo_archived_workflows.clustername
            AND UID = argo_archived_workflows.uid
            AND name = 'workflows.argoproj.io/workflow-template'
            AND value = 'mapping1-pipeline-template-with-nfs'))
ORDER BY "startedat" DESC
LIMIT 1;
```

now:

```sql
SELECT name,
       namespace,
       UID,
       phase,
       startedat,
       finishedat,
       coalesce((workflow::JSON)->'metadata'->>'labels', '{}') AS labels,
       coalesce((workflow::JSON)->'metadata'->>'annotations', '{}') AS annotations,
       coalesce((workflow::JSON)->'status'->>'progress', '') AS progress,
       coalesce((workflow::JSON)->'metadata'->>'creationTimestamp', '') AS creationtimestamp,
       (workflow::JSON)->'spec'->>'suspend' AS suspend,
       coalesce((workflow::JSON)->'status'->>'message', '') AS message,
       coalesce((workflow::JSON)->'status'->>'estimatedDuration', '0') AS estimatedduration,
       coalesce((workflow::JSON)->'status'->>'resourcesDuration', '{}') AS resourcesduration
FROM "argo_archived_workflows"
WHERE "clustername" = 'default'
  AND UID IN
    (SELECT UID
     FROM "argo_archived_workflows"
     WHERE (("clustername" = 'default'
             AND "instanceid" = '')
            AND "namespace" = 'argo-map'
            AND EXISTS
              (SELECT 1
               FROM argo_archived_workflows_labels
               WHERE clustername = argo_archived_workflows.clustername
                 AND UID = argo_archived_workflows.uid
                 AND name = 'workflows.argoproj.io/phase'
                 AND value = 'Succeeded')
            AND EXISTS
              (SELECT 1
               FROM argo_archived_workflows_labels
               WHERE clustername = argo_archived_workflows.clustername
                 AND UID = argo_archived_workflows.uid
                 AND name = 'workflows.argoproj.io/workflow-template'
                 AND value = 'mapping1-pipeline-template-with-nfs'))
     ORDER BY "startedat" DESC
     LIMIT 1);
```

Signed-off-by: Xiaofan Hu <[email protected]>
  • Loading branch information
Xiaofan Hu authored and bom-d-van committed Sep 6, 2024
1 parent 7173a27 commit 9856df7
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions persist/sqldb/workflow_archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,16 +163,20 @@ func (r *workflowArchive) ListWorkflows(options sutils.ListOptions) (wfv1.Workfl
return nil, err
}

selector := r.session.SQL().
Select(selectQuery).
subSelector := r.session.SQL().
Select(db.Raw("uid")).
From(archiveTableName).
Where(r.clusterManagedNamespaceAndInstanceID())

selector, err = BuildArchivedWorkflowSelector(selector, archiveTableName, archiveLabelsTableName, r.dbType, options, false)
subSelector, err = BuildArchivedWorkflowSelector(subSelector, archiveTableName, archiveLabelsTableName, r.dbType, options, false)
if err != nil {
return nil, err
}

selector := r.session.SQL().Select(selectQuery).From(archiveTableName).Where(
r.clusterManagedNamespaceAndInstanceID().And(db.Cond{"uid IN": subSelector}),
)

err = selector.All(&archivedWfs)
if err != nil {
return nil, err
Expand Down

0 comments on commit 9856df7

Please sign in to comment.