From ded340966ea968e2dcf2eaa41df9ed0bdcac713e Mon Sep 17 00:00:00 2001 From: Mason Malone <651224+MasonM@users.noreply.github.com> Date: Thu, 17 Oct 2024 09:45:57 -0700 Subject: [PATCH 1/3] fix: remove JSON cast when querying archived workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With PostgreSQL, the `argo_archived_workflows.workflow` column has been of type `json` ever since 8a1e611a03da8374567c9654f8baf29b66c83c6e, which was released as v2.5.0. Therefore, the `::json` casts do nothing, and prevent users from improving performance by migrating to JSONB using the following query: ```sql alter table argo_archived_workflows alter column workflow set data type jsonb using workflow::jsonb ``` Without the changes in this PR, running the above will massively slow down the queries, because casting JSONB to JSON is expensive. With the changes in this PR, it improves performance by ~80%, which I determined by running the benchmarks added in https://github.com/argoproj/argo-workflows/pull/13767 against a DB with 100,000 randomly generated workflows generated by https://github.com/argoproj/argo-workflows/pull/13715: ``` $ benchstat postgres_before_10000_workflows.txt postgres_after_10000_workflows.txt goos: linux goarch: amd64 pkg: github.com/argoproj/argo-workflows/v3/test/e2e cpu: 12th Gen Intel(R) Core(TM) i5-12400 │ postgres_before_10000_workflows.txt │ postgres_after_10000_workflows.txt │ │ sec/op │ sec/op vs base │ WorkflowArchive/ListWorkflows-12 185.81m ± ∞ ¹ 25.06m ± ∞ ¹ ~ (p=1.000 n=1) ² WorkflowArchive/ListWorkflows_with_label_selector-12 186.35m ± ∞ ¹ 25.99m ± ∞ ¹ ~ (p=1.000 n=1) ² WorkflowArchive/CountWorkflows-12 42.39m ± ∞ ¹ 11.78m ± ∞ ¹ ~ (p=1.000 n=1) ² geomean 113.6m 19.72m -82.64% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 ``` The only downside to migrating to JSONB is it can take a long time if you've got a ton of workflows (~72s on my local DB with 100,000 workflows). I'll enter a separate PR for the migration, but I'm entering this change separately so it can hopefully go out in 3.6.0. Signed-off-by: Mason Malone <651224+MasonM@users.noreply.github.com> --- persist/sqldb/workflow_archive.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/persist/sqldb/workflow_archive.go b/persist/sqldb/workflow_archive.go index 4520297889d1..7371fb37bedd 100644 --- a/persist/sqldb/workflow_archive.go +++ b/persist/sqldb/workflow_archive.go @@ -452,7 +452,7 @@ func selectArchivedWorkflowQuery(t dbType) (*db.RawExpr, error) { case MySQL: return db.Raw("name, namespace, uid, phase, startedat, finishedat, coalesce(JSON_EXTRACT(workflow,'$.metadata.labels'), '{}') as labels,coalesce(JSON_EXTRACT(workflow,'$.metadata.annotations'), '{}') as annotations, coalesce(JSON_UNQUOTE(JSON_EXTRACT(workflow,'$.status.progress')), '') as progress, coalesce(JSON_UNQUOTE(JSON_EXTRACT(workflow,'$.metadata.creationTimestamp')), '') as creationtimestamp, JSON_UNQUOTE(JSON_EXTRACT(workflow,'$.spec.suspend')) as suspend, coalesce(JSON_UNQUOTE(JSON_EXTRACT(workflow,'$.status.message')), '') as message, coalesce(JSON_UNQUOTE(JSON_EXTRACT(workflow,'$.status.estimatedDuration')), '0') as estimatedduration, coalesce(JSON_EXTRACT(workflow,'$.status.resourcesDuration'), '{}') as resourcesduration"), nil case Postgres: - return db.Raw("name, namespace, uid, phase, startedat, finishedat, coalesce((workflow::json)->'metadata'->>'labels', '{}') as labels, coalesce((workflow::json)->'metadata'->>'annotations', '{}') as annotations, coalesce((workflow::json)->'status'->>'progress', '') as progress, coalesce((workflow::json)->'metadata'->>'creationTimestamp', '') as creationtimestamp, (workflow::json)->'spec'->>'suspend' as suspend, coalesce((workflow::json)->'status'->>'message', '') as message, coalesce((workflow::json)->'status'->>'estimatedDuration', '0') as estimatedduration, coalesce((workflow::json)->'status'->>'resourcesDuration', '{}') as resourcesduration"), nil + return db.Raw("name, namespace, uid, phase, startedat, finishedat, coalesce(workflow->'metadata'->>'labels', '{}') as labels, coalesce(workflow->'metadata'->>'annotations', '{}') as annotations, coalesce(workflow->'status'->>'progress', '') as progress, coalesce(workflow->'metadata'->>'creationTimestamp', '') as creationtimestamp, workflow->'spec'->>'suspend' as suspend, coalesce(workflow->'status'->>'message', '') as message, coalesce(workflow->'status'->>'estimatedDuration', '0') as estimatedduration, coalesce(workflow->'status'->>'resourcesDuration', '{}') as resourcesduration"), nil } return nil, fmt.Errorf("unsupported db type %s", t) } From 77510ad5680b9a2d9cab9b3d660385e4944888a9 Mon Sep 17 00:00:00 2001 From: Mason Malone <651224+MasonM@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:11:12 -0700 Subject: [PATCH 2/3] fix: migrate argo_archived_workflows.workflow to JSONB. Fixes #13601 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As explained in https://github.com/argoproj/argo-workflows/issues/13601#issuecomment-2420499551, I believe https://github.com/argoproj/argo-workflows/pull/12912 introduced a performance regression when listing workflows for PostgreSQL users. Reverting that PR could re-introduce the memory issues mentioned in the PR description, so instead this mitigates the impact by converting the `workflow` column to be of type `jsonb`. Initially `workflow` was of type `text`, and was migrated to `json` in https://github.com/argoproj/argo-workflows/pull/2152. I'm not sure why `jsonb` wasn't chosen, but [based on this comment in the linked issue](https://github.com/argoproj/argo-workflows/issues/2133#issuecomment-583372400), I think it was simply an oversight. Here's the relevant docs (https://www.postgresql.org/docs/current/datatype-json.html): > The json and jsonb data types accept almost identical sets of values as input. The major practical difference is one of efficiency. The json data type stores an exact copy of the input text, which processing functions must reparse on each execution; while jsonb data is stored in a decomposed binary format that makes it slightly slower to input due to added conversion overhead, but significantly faster to process, since no reparsing is needed. jsonb also supports indexing, which can be a significant advantage. > > Because the json type stores an exact copy of the input text, it will preserve semantically-insignificant white space between tokens, as well as the order of keys within JSON objects. Also, if a JSON object within the value contains the same key more than once, all the key/value pairs are kept. (The processing functions consider the last value as the operative one.) By contrast, jsonb does not preserve white space, does not preserve the order of object keys, and does not keep duplicate object keys. If duplicate keys are specified in the input, only the last value is kept. > > In general, most applications should prefer to store JSON data as jsonb, unless there are quite specialized needs, such as legacy assumptions about ordering of object keys. I'm pretty sure we don't care about key order or whitespace. We do care somewhat about insertion speed, but archived workflows are read much more frequently than written, so a slight reduction in write speed that gives a large improvement in read speed is a good tradeoff. Here's steps to test this: 1. Use https://github.com/argoproj/argo-workflows/pull/13715 to generate 100,000 randomized workflows, with https://gist.github.com/MasonM/52932ff6644c3c0ccea9e847780bfd90 as a template: ``` $ time go run ./hack/db fake-archived-workflows --template "@very-large-workflow.yaml" --rows 100000 Using seed 1935828722624432788 Clusters: [default] Namespaces: [argo] Inserted 100000 rows real 18m35.316s user 3m2.447s sys 0m44.972s ``` 2. Run the benchmarks using https://github.com/argoproj/argo-workflows/pull/13767: ``` make BenchmarkWorkflowArchive > postgres_before_10000_workflows.txt ``` 3. Run the migration the DB CLI: ``` $ time go run ./hack/db migrate INFO[0000] Migrating database schema clusterName=default dbType=postgres INFO[0000] applying database change change="alter table argo_archived_workflows alter column workflow set data type jsonb using workflow::jsonb" changeSchemaVersion=60 2024/10/17 18:07:42 Session ID: 00001 Query: alter table argo_archived_workflows alter column workflow set data type jsonb using workflow::jsonb Stack: fmt.(*pp).handleMethods@/usr/local/go/src/fmt/print.go:673 fmt.(*pp).printArg@/usr/local/go/src/fmt/print.go:756 fmt.(*pp).doPrint@/usr/local/go/src/fmt/print.go:1208 fmt.Append@/usr/local/go/src/fmt/print.go:289 log.(*Logger).Print.func1@/usr/local/go/src/log/log.go:261 log.(*Logger).output@/usr/local/go/src/log/log.go:238 log.(*Logger).Print@/usr/local/go/src/log/log.go:260 github.com/argoproj/argo-workflows/v3/persist/sqldb.ansiSQLChange.apply@/home/vscode/go/src/github.com/argoproj/argo-workflows/persist/sqldb/ansi_sql_change.go:11 github.com/argoproj/argo-workflows/v3/persist/sqldb.migrate.applyChange.func1@/home/vscode/go/src/github.com/argoproj/argo-workflows/persist/sqldb/migrate.go:295 github.com/argoproj/argo-workflows/v3/persist/sqldb.migrate.applyChange@/home/vscode/go/src/github.com/argoproj/argo-workflows/persist/sqldb/migrate.go:284 github.com/argoproj/argo-workflows/v3/persist/sqldb.migrate.Exec@/home/vscode/go/src/github.com/argoproj/argo-workflows/persist/sqldb/migrate.go:273 main.NewMigrateCommand.func1@/home/vscode/go/src/github.com/argoproj/argo-workflows/hack/db/main.go:50 github.com/spf13/cobra.(*Command).execute@/home/vscode/go/pkg/mod/github.com/spf13/cobra@v1.8.1/command.go:985 github.com/spf13/cobra.(*Command).ExecuteC@/home/vscode/go/pkg/mod/github.com/spf13/cobra@v1.8.1/command.go:1117 github.com/spf13/cobra.(*Command).Execute@/home/vscode/go/pkg/mod/github.com/spf13/cobra@v1.8.1/command.go:1041 main.main@/home/vscode/go/src/github.com/argoproj/argo-workflows/hack/db/main.go:39 runtime.main@/usr/local/go/src/runtime/proc.go:272 runtime.goexit@/usr/local/go/src/runtime/asm_amd64.s:1700 Rows affected: 0 Error: upper: slow query Time taken: 69.12755s Context: context.Background real 1m10.087s user 0m1.541s sys 0m0.410s ``` 2. Re-run the benchmarks: ``` make BenchmarkWorkflowArchive > postgres_after_10000_workflows.txt ``` 4. Compare results using [benchstat](https://pkg.go.dev/golang.org/x/perf/cmd/benchstat): ``` $ benchstat postgres_before_10000_workflows3.txt postgres_after_10000_workflows2.txt goos: linux goarch: amd64 pkg: github.com/argoproj/argo-workflows/v3/test/e2e cpu: 12th Gen Intel(R) Core(TM) i5-12400 │ postgres_before_10000_workflows3.txt │ postgres_after_10000_workflows2.txt │ │ sec/op │ sec/op vs base │ WorkflowArchive/ListWorkflows-12 183.83m ± ∞ ¹ 24.69m ± ∞ ¹ ~ (p=1.000 n=1) ² WorkflowArchive/ListWorkflows_with_label_selector-12 192.71m ± ∞ ¹ 25.87m ± ∞ ¹ ~ (p=1.000 n=1) ² WorkflowArchive/CountWorkflows-12 13.04m ± ∞ ¹ 11.75m ± ∞ ¹ ~ (p=1.000 n=1) ² geomean 77.31m 19.58m -74.68% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 │ postgres_before_10000_workflows3.txt │ postgres_after_10000_workflows2.txt │ │ B/op │ B/op vs base │ WorkflowArchive/ListWorkflows-12 497.2Ki ± ∞ ¹ 497.5Ki ± ∞ ¹ ~ (p=1.000 n=1) ² WorkflowArchive/ListWorkflows_with_label_selector-12 503.1Ki ± ∞ ¹ 503.9Ki ± ∞ ¹ ~ (p=1.000 n=1) ² WorkflowArchive/CountWorkflows-12 8.972Ki ± ∞ ¹ 8.899Ki ± ∞ ¹ ~ (p=1.000 n=1) ² geomean 130.9Ki 130.7Ki -0.20% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 │ postgres_before_10000_workflows3.txt │ postgres_after_10000_workflows2.txt │ │ allocs/op │ allocs/op vs base │ WorkflowArchive/ListWorkflows-12 8.373k ± ∞ ¹ 8.370k ± ∞ ¹ ~ (p=1.000 n=1) ² WorkflowArchive/ListWorkflows_with_label_selector-12 8.410k ± ∞ ¹ 8.406k ± ∞ ¹ ~ (p=1.000 n=1) ² WorkflowArchive/CountWorkflows-12 212.0 ± ∞ ¹ 212.0 ± ∞ ¹ ~ (p=1.000 n=1) ³ geomean 2.462k 2.462k -0.03% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 ³ all samples are equal ``` Signed-off-by: Mason Malone <651224+MasonM@users.noreply.github.com> --- persist/sqldb/migrate.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/persist/sqldb/migrate.go b/persist/sqldb/migrate.go index 45c908cc87de..193b20ccb887 100644 --- a/persist/sqldb/migrate.go +++ b/persist/sqldb/migrate.go @@ -25,6 +25,12 @@ type change interface { apply(session db.Session) error } +type noop struct{} + +func (s noop) apply(session db.Session) error { + return nil +} + func ternary(condition bool, left, right change) change { if condition { return left @@ -258,6 +264,11 @@ func (m migrate) Exec(ctx context.Context) (err error) { // add indexes for list archived workflow performance. #8836 ansiSQLChange(`create index argo_archived_workflows_i4 on argo_archived_workflows (startedat)`), ansiSQLChange(`create index argo_archived_workflows_labels_i1 on argo_archived_workflows_labels (name,value)`), + // PostgreSQL only: convert argo_archived_workflows.workflow column to JSONB for performance. #13601 + ternary(dbType == MySQL, + noop{}, + ansiSQLChange(`alter table argo_archived_workflows alter column workflow set data type jsonb using workflow::jsonb`), + ), } { err := m.applyChange(changeSchemaVersion, change) if err != nil { From f25960ac6f63bc3cf1a3b8a9813cf43c61921262 Mon Sep 17 00:00:00 2001 From: Mason Malone <651224+MasonM@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:07:21 -0700 Subject: [PATCH 3/3] fix: minor adjustments to comment Signed-off-by: Mason Malone <651224+MasonM@users.noreply.github.com> --- persist/sqldb/migrate.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/persist/sqldb/migrate.go b/persist/sqldb/migrate.go index 193b20ccb887..633dfd2ff31a 100644 --- a/persist/sqldb/migrate.go +++ b/persist/sqldb/migrate.go @@ -264,7 +264,7 @@ func (m migrate) Exec(ctx context.Context) (err error) { // add indexes for list archived workflow performance. #8836 ansiSQLChange(`create index argo_archived_workflows_i4 on argo_archived_workflows (startedat)`), ansiSQLChange(`create index argo_archived_workflows_labels_i1 on argo_archived_workflows_labels (name,value)`), - // PostgreSQL only: convert argo_archived_workflows.workflow column to JSONB for performance. #13601 + // PostgreSQL only: convert argo_archived_workflows.workflow column to JSONB for performance and consistency with MySQL. #13779 ternary(dbType == MySQL, noop{}, ansiSQLChange(`alter table argo_archived_workflows alter column workflow set data type jsonb using workflow::jsonb`),