From d16ac84cbce947217334f7e9f12a3faab8331863 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Sat, 10 Aug 2024 18:28:10 +0200 Subject: [PATCH 01/14] Dustapps sync script --- front/admin/copy_apps.sh | 87 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100755 front/admin/copy_apps.sh diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh new file mode 100755 index 000000000000..d79e8aa6a40c --- /dev/null +++ b/front/admin/copy_apps.sh @@ -0,0 +1,87 @@ +#!/bin/bash +TARGET_WORKSPACE_ID=1 +FRONT_DATABASE_URI="postgres://dev:xxx@localhost/dust_front" +CORE_DATABASE_URI="postgres://dev:xxx@localhost/dust_api" + +echo "Will copy apps into workspace ${TARGET_WORKSPACE_ID}..." +echo "You'll have to manually update front/lib/api/config.ts to use localhost:3000 instead of dust.tt," +echo "and front/lib/development.ts / types/src/front/lib/actions/registry.ts to set your workspace sId in PRODUCTION_DUST_APPS_WORKSPACE_ID" +echo "Ensure you have valid env variables for DUST_MANAGED_ANTHROPIC_API_KEY, DUST_MANAGED_SERP_API_KEY and DUST_MANAGED_BROWSERLESS_API_KEY." +set -e + +echo "Fetching prodbox pod..." +PRODBOX_POD_NAME=$(kubectl get pods |grep prodbox |cut -d \ -f1) + +function escaped_columns_list { + echo $* | sed -E 's/ /,/g'| sed -E 's/([a-zA-Z_]+)/\\"\1\\"/g' +} +function columns_list { + echo $* | sed -E 's/ /,/g'| sed -E 's/([a-zA-Z_]+)/"\1"/g' +} +function updates_clause { + echo $* | sed -E 's/ /,/g'| sed -E 's/([a-zA-Z_]+)/"\1"=__copy."\1"/g' +} +function copy_clause { + echo $* | sed -E 's/ /,/g'| sed -E 's/([a-zA-Z_]+)/__copy."\1"/g' +} +function fetch { + database_uri=${1} + table_name=${2} + cols_to_fetch=${3} + where_clause=${4} + + echo "Fetching ${table_name} from ${PRODBOX_POD_NAME}..." + kubectl exec -it ${PRODBOX_POD_NAME} -- bash -c "psql \$${database_uri}_DATABASE_URI -c \"COPY (SELECT $(escaped_columns_list ${cols_to_fetch}) FROM ${table_name} WHERE ${where_clause}) TO STDOUT;\"" > ${database_uri}_${table_name}.csv +} + +function import { + database_uri=${1} + table_name=${2} + cols_to_import=${3} + cols_to_update=${4} + + eval uri='$'${database_uri}_DATABASE_URI + echo -n "Preparing ${table_name}... " + psql ${uri} -c "drop table if exists __copy; create table __copy as (select * from ${table_name} limit 0);" + echo -n "Importing ${table_name}... " + psql ${uri} -c "COPY __copy ($(columns_list ${cols_to_import})) from stdin;" < ${database_uri}_${table_name}.csv + echo -n "Updating existing ${table_name}... " + psql ${uri} -c "update ${table_name} set $(updates_clause $cols_to_update) from __copy where ${table_name}.id = __copy.id;" + echo -n "Inserting new ${table_name}... " + psql ${uri} -c "insert into ${table_name} ($(columns_list ${cols_to_import})) (select $(copy_clause ${cols_to_import}) from __copy left join ${table_name} using(id) where ${table_name} is null);" + echo -n "Cleaning up ${table_name}... " + psql ${uri} -c "drop table if exists __copy;" +} + +# ---- apps +fetch FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "\\\"workspaceId\\\"=5069" +cat FRONT_apps.csv | cut -f1-11 | sed -E "s/^(.*)$/\1\t${TARGET_WORKSPACE_ID}/g" > FRONT_apps_transformed.csv +mv FRONT_apps_transformed.csv FRONT_apps.csv +import FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "updatedAt name description visibility savedSpecification savedConfig savedRun dustAPIProjectId" + +# ---- datasets +fetch FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "\\\"workspaceId\\\"=5069" +cat FRONT_datasets.csv | cut -f1-7 | sed -E "s/^(.*)$/\1\t${TARGET_WORKSPACE_ID}/g" > FRONT_datasets_transformed.csv +mv FRONT_datasets_transformed.csv FRONT_datasets.csv +import FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "updatedAt name description schema" + +project_ids=$(cut -f 11 FRONT_apps.csv |paste -sd "," -) + +# ---- projects +fetch CORE projects "id" "\\\"id\\\" in (${project_ids})" +import CORE projects "id" "id" + +# ---- specifications +fetch CORE specifications "id project created hash specification" "\\\"project\\\" in (${project_ids})" +import CORE specifications "id project created hash specification" "hash specification" + +# ---- datasets +fetch CORE datasets "id project created dataset_id hash" "\\\"project\\\" in (${project_ids})" +dataset_ids=$(cut -f 1 CORE_datasets.csv |paste -sd "," -) +fetch CORE datasets_joins "id dataset point point_idx" "\\\"dataset\\\" in (${dataset_ids})" +dataset_points_ids=$(cut -f 3 CORE_datasets_joins.csv |paste -sd "," -) +fetch CORE datasets_points "id hash json" "\\\"id\\\" in (${dataset_points_ids})" + +import CORE datasets "id project created dataset_id hash" "hash" +import CORE datasets_points "id hash json" "hash json" +import CORE datasets_joins " id dataset point point_idx" "point point_idx" From 754e1c93926648965485eadf47a90758b81fcbdc Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Mon, 19 Aug 2024 17:01:06 +0200 Subject: [PATCH 02/14] check env var, delete files after execution --- front/admin/copy_apps.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index d79e8aa6a40c..c8ac733d6df5 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -1,7 +1,13 @@ #!/bin/bash -TARGET_WORKSPACE_ID=1 -FRONT_DATABASE_URI="postgres://dev:xxx@localhost/dust_front" -CORE_DATABASE_URI="postgres://dev:xxx@localhost/dust_api" +DIR=$(dirname $0) + +if [ -z "$TARGET_WORKSPACE_ID" ] +then + echo "Please set TARGET_WORKSPACE_ID to set where to create dust-apps." + exit 1 +fi + +mkdir /tmp/dust-apps echo "Will copy apps into workspace ${TARGET_WORKSPACE_ID}..." echo "You'll have to manually update front/lib/api/config.ts to use localhost:3000 instead of dust.tt," @@ -31,7 +37,7 @@ function fetch { where_clause=${4} echo "Fetching ${table_name} from ${PRODBOX_POD_NAME}..." - kubectl exec -it ${PRODBOX_POD_NAME} -- bash -c "psql \$${database_uri}_DATABASE_URI -c \"COPY (SELECT $(escaped_columns_list ${cols_to_fetch}) FROM ${table_name} WHERE ${where_clause}) TO STDOUT;\"" > ${database_uri}_${table_name}.csv + kubectl exec -it ${PRODBOX_POD_NAME} -- bash -c "psql \$${database_uri}_DATABASE_URI -c \"COPY (SELECT $(escaped_columns_list ${cols_to_fetch}) FROM ${table_name} WHERE ${where_clause}) TO STDOUT;\"" > /tmp/dust-apps/${database_uri}_${table_name}.csv } function import { @@ -44,7 +50,7 @@ function import { echo -n "Preparing ${table_name}... " psql ${uri} -c "drop table if exists __copy; create table __copy as (select * from ${table_name} limit 0);" echo -n "Importing ${table_name}... " - psql ${uri} -c "COPY __copy ($(columns_list ${cols_to_import})) from stdin;" < ${database_uri}_${table_name}.csv + psql ${uri} -c "COPY __copy ($(columns_list ${cols_to_import})) from stdin;" < /tmp/dust-apps/${database_uri}_${table_name}.csv echo -n "Updating existing ${table_name}... " psql ${uri} -c "update ${table_name} set $(updates_clause $cols_to_update) from __copy where ${table_name}.id = __copy.id;" echo -n "Inserting new ${table_name}... " @@ -85,3 +91,5 @@ fetch CORE datasets_points "id hash json" "\\\"id\\\" in (${dataset_points_ids}) import CORE datasets "id project created dataset_id hash" "hash" import CORE datasets_points "id hash json" "hash json" import CORE datasets_joins " id dataset point point_idx" "point point_idx" + +rm -R /tmp/dust-apps \ No newline at end of file From 6e9b0d6484becff2032985d88da61babd3d2cae5 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 20 Aug 2024 14:16:17 +0200 Subject: [PATCH 03/14] Check if apps are synced before running --- .husky/post-merge | 6 +++ .husky/pre-rebase | 6 +++ front/admin/cli.ts | 27 ++++++++++ front/admin/copy_apps.sh | 114 ++++++++++++++++++++++----------------- 4 files changed, 103 insertions(+), 50 deletions(-) create mode 100755 .husky/post-merge create mode 100755 .husky/pre-rebase diff --git a/.husky/post-merge b/.husky/post-merge new file mode 100755 index 000000000000..26cd39742be9 --- /dev/null +++ b/.husky/post-merge @@ -0,0 +1,6 @@ +#!/bin/sh + +echo "Running post-merge hook" +./front/admin/copy_apps.sh + +exit 0 diff --git a/.husky/pre-rebase b/.husky/pre-rebase new file mode 100755 index 000000000000..a8d5b1cb948b --- /dev/null +++ b/.husky/pre-rebase @@ -0,0 +1,6 @@ +#!/bin/bash + +echo "Running pre-rebase hook" +./front/admin/copy_apps.sh + +exit 0 diff --git a/front/admin/cli.ts b/front/admin/cli.ts index 2e980e2b3f6d..3655f93d78bc 100644 --- a/front/admin/cli.ts +++ b/front/admin/cli.ts @@ -1,6 +1,8 @@ +import type { DustRegistryActionName } from "@dust-tt/types"; import { CLAUDE_3_OPUS_DEFAULT_MODEL_CONFIG, ConnectorsAPI, + DustProdActionRegistry, removeNulls, SUPPORTED_MODEL_CONFIGS, } from "@dust-tt/types"; @@ -185,6 +187,11 @@ const workspace = async (command: string, args: parseArgs.ParsedArgs) => { return; } + case "dump-registry": { + console.log(DustProdActionRegistry); + return; + } + default: console.log(`Unknown workspace command: ${command}`); console.log( @@ -560,6 +567,24 @@ const transcripts = async (command: string, args: parseArgs.ParsedArgs) => { } }; +const registry = async (command: string, args: parseArgs.ParsedArgs) => { + switch (command) { + case "dump": { + Object.keys(DustProdActionRegistry).forEach((key) => { + const appName = key as DustRegistryActionName; + console.log( + `${DustProdActionRegistry[appName].app.workspaceId}|${key}|${DustProdActionRegistry[appName].app.appId}|${DustProdActionRegistry[appName].app.appHash}` + ); + }); + return; + } + + default: + console.log(`Unknown workspace command: ${command}`); + console.log("Possible values: `dump`"); + } +}; + const main = async () => { const argv = parseArgs(process.argv.slice(2)); @@ -589,6 +614,8 @@ const main = async () => { return conversation(command, argv); case "transcripts": return transcripts(command, argv); + case "registry": + return registry(command, argv); default: console.log( "Unknown object type, possible values: `workspace`, `user`, `data-source`, `event-schema`, `conversation`, `transcripts`" diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index c8ac733d6df5..1df151c7739c 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -1,23 +1,6 @@ #!/bin/bash DIR=$(dirname $0) -if [ -z "$TARGET_WORKSPACE_ID" ] -then - echo "Please set TARGET_WORKSPACE_ID to set where to create dust-apps." - exit 1 -fi - -mkdir /tmp/dust-apps - -echo "Will copy apps into workspace ${TARGET_WORKSPACE_ID}..." -echo "You'll have to manually update front/lib/api/config.ts to use localhost:3000 instead of dust.tt," -echo "and front/lib/development.ts / types/src/front/lib/actions/registry.ts to set your workspace sId in PRODUCTION_DUST_APPS_WORKSPACE_ID" -echo "Ensure you have valid env variables for DUST_MANAGED_ANTHROPIC_API_KEY, DUST_MANAGED_SERP_API_KEY and DUST_MANAGED_BROWSERLESS_API_KEY." -set -e - -echo "Fetching prodbox pod..." -PRODBOX_POD_NAME=$(kubectl get pods |grep prodbox |cut -d \ -f1) - function escaped_columns_list { echo $* | sed -E 's/ /,/g'| sed -E 's/([a-zA-Z_]+)/\\"\1\\"/g' } @@ -53,43 +36,74 @@ function import { psql ${uri} -c "COPY __copy ($(columns_list ${cols_to_import})) from stdin;" < /tmp/dust-apps/${database_uri}_${table_name}.csv echo -n "Updating existing ${table_name}... " psql ${uri} -c "update ${table_name} set $(updates_clause $cols_to_update) from __copy where ${table_name}.id = __copy.id;" - echo -n "Inserting new ${table_name}... " + echo -n "Inserting new ${table_name}..." psql ${uri} -c "insert into ${table_name} ($(columns_list ${cols_to_import})) (select $(copy_clause ${cols_to_import}) from __copy left join ${table_name} using(id) where ${table_name} is null);" echo -n "Cleaning up ${table_name}... " psql ${uri} -c "drop table if exists __copy;" } -# ---- apps -fetch FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "\\\"workspaceId\\\"=5069" -cat FRONT_apps.csv | cut -f1-11 | sed -E "s/^(.*)$/\1\t${TARGET_WORKSPACE_ID}/g" > FRONT_apps_transformed.csv -mv FRONT_apps_transformed.csv FRONT_apps.csv -import FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "updatedAt name description visibility savedSpecification savedConfig savedRun dustAPIProjectId" - -# ---- datasets -fetch FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "\\\"workspaceId\\\"=5069" -cat FRONT_datasets.csv | cut -f1-7 | sed -E "s/^(.*)$/\1\t${TARGET_WORKSPACE_ID}/g" > FRONT_datasets_transformed.csv -mv FRONT_datasets_transformed.csv FRONT_datasets.csv -import FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "updatedAt name description schema" - -project_ids=$(cut -f 11 FRONT_apps.csv |paste -sd "," -) - -# ---- projects -fetch CORE projects "id" "\\\"id\\\" in (${project_ids})" -import CORE projects "id" "id" - -# ---- specifications -fetch CORE specifications "id project created hash specification" "\\\"project\\\" in (${project_ids})" -import CORE specifications "id project created hash specification" "hash specification" - -# ---- datasets -fetch CORE datasets "id project created dataset_id hash" "\\\"project\\\" in (${project_ids})" -dataset_ids=$(cut -f 1 CORE_datasets.csv |paste -sd "," -) -fetch CORE datasets_joins "id dataset point point_idx" "\\\"dataset\\\" in (${dataset_ids})" -dataset_points_ids=$(cut -f 3 CORE_datasets_joins.csv |paste -sd "," -) -fetch CORE datasets_points "id hash json" "\\\"id\\\" in (${dataset_points_ids})" - -import CORE datasets "id project created dataset_id hash" "hash" -import CORE datasets_points "id hash json" "hash json" -import CORE datasets_joins " id dataset point point_idx" "point point_idx" +if [ -z "$DUST_APPS_SYNC_WORKSPACE_ID" ] +then + echo "Please set DUST_APPS_SYNC_WORKSPACE_ID if you want to synchronize dust-apps." + exit 1 +fi + +mkdir -p /tmp/dust-apps + +cd ${DIR}/.. + +./admin/cli.sh registry dump > /tmp/dust-apps/specs 2> /dev/null + +REGISTRY_COUNT=$(cat /tmp/dust-apps/specs | wc -w) +IN_CLAUSE=$(cat /tmp/dust-apps/specs | cut -f4 -d\| | sed -E "s/(.*)/'\\1'/g" | paste -sd "," - ) +LOCAL_COUNT=$(psql $CORE_DATABASE_URI -c "copy (select count(distinct(hash)) from specifications where hash in (${IN_CLAUSE})) to stdout") + +if [ $REGISTRY_COUNT -eq $LOCAL_COUNT ] +then + echo "All apps available, skipping." +else + echo "Will copy apps into workspace ${DUST_APPS_SYNC_WORKSPACE_ID}..." + echo "You'll have to manually update front/lib/api/config.ts to use localhost:3000 instead of dust.tt," + echo "and front/lib/development.ts / types/src/front/lib/actions/registry.ts to set your workspace sId in PRODUCTION_DUST_APPS_WORKSPACE_ID" + echo "Ensure you have valid env variables for DUST_MANAGED_ANTHROPIC_API_KEY, DUST_MANAGED_SERP_API_KEY and DUST_MANAGED_BROWSERLESS_API_KEY." + set -e + + echo "Fetching prodbox pod..." + PRODBOX_POD_NAME=$(kubectl get pods |grep prodbox |cut -d \ -f1) + + # ---- apps + fetch FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "\\\"workspaceId\\\"=5069" + cat /tmp/dust-apps/FRONT_apps.csv | cut -f1-11 | sed -E "s/^(.*)$/\1\t${DUST_APPS_SYNC_WORKSPACE_ID}/g" > /tmp/dust-apps/FRONT_apps_transformed.csv + mv /tmp/dust-apps/FRONT_apps_transformed.csv /tmp/dust-apps/FRONT_apps.csv + import FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "updatedAt name description visibility savedSpecification savedConfig savedRun dustAPIProjectId" + + # ---- datasets + fetch FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "\\\"workspaceId\\\"=5069" + cat /tmp/dust-apps/FRONT_datasets.csv | cut -f1-7 | sed -E "s/^(.*)$/\1\t${DUST_APPS_SYNC_WORKSPACE_ID}/g" > /tmp/dust-apps/FRONT_datasets_transformed.csv + mv /tmp/dust-apps/FRONT_datasets_transformed.csv /tmp/dust-apps/FRONT_datasets.csv + import FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "updatedAt name description schema" + + project_ids=$(cut -f 11 FRONT_apps.csv |paste -sd "," -) + + # ---- projects + fetch CORE projects "id" "\\\"id\\\" in (${project_ids})" + import CORE projects "id" "id" + + # ---- specifications + fetch CORE specifications "id project created hash specification" "\\\"project\\\" in (${project_ids})" + import CORE specifications "id project created hash specification" "hash specification" + + # ---- datasets + fetch CORE datasets "id project created dataset_id hash" "\\\"project\\\" in (${project_ids})" + dataset_ids=$(cut -f 1 CORE_datasets.csv |paste -sd "," -) + fetch CORE datasets_joins "id dataset point point_idx" "\\\"dataset\\\" in (${dataset_ids})" + dataset_points_ids=$(cut -f 3 CORE_datasets_joins.csv |paste -sd "," -) + fetch CORE datasets_points "id hash json" "\\\"id\\\" in (${dataset_points_ids})" + + import CORE datasets "id project created dataset_id hash" "hash" + import CORE datasets_points "id hash json" "hash json" + import CORE datasets_joins " id dataset point point_idx" "point point_idx" + +fi rm -R /tmp/dust-apps \ No newline at end of file From 3d4104513d66939d527752d5e1b9f475a97b4777 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 20 Aug 2024 14:17:51 +0200 Subject: [PATCH 04/14] don't fail if env var is not set --- front/admin/copy_apps.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index 1df151c7739c..f418ea346271 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -45,7 +45,7 @@ function import { if [ -z "$DUST_APPS_SYNC_WORKSPACE_ID" ] then echo "Please set DUST_APPS_SYNC_WORKSPACE_ID if you want to synchronize dust-apps." - exit 1 + exit 0 fi mkdir -p /tmp/dust-apps From bd60b6c37360df9cce6cca7adf6343d464bc8372 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 20 Aug 2024 17:41:03 +0200 Subject: [PATCH 05/14] Update front/admin/copy_apps.sh Co-authored-by: Flavien David --- front/admin/copy_apps.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index f418ea346271..c1bcd4b2b77b 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -55,7 +55,7 @@ cd ${DIR}/.. ./admin/cli.sh registry dump > /tmp/dust-apps/specs 2> /dev/null REGISTRY_COUNT=$(cat /tmp/dust-apps/specs | wc -w) -IN_CLAUSE=$(cat /tmp/dust-apps/specs | cut -f4 -d\| | sed -E "s/(.*)/'\\1'/g" | paste -sd "," - ) +IN_CLAUSE=$(cat /tmp/dust-apps/specs | cut -f4 -d\| | sed -E "s/(.*)/'\\1'/g" | paste -sd "," - ) LOCAL_COUNT=$(psql $CORE_DATABASE_URI -c "copy (select count(distinct(hash)) from specifications where hash in (${IN_CLAUSE})) to stdout") if [ $REGISTRY_COUNT -eq $LOCAL_COUNT ] From 8a26db86ea3b75eb40647fa67fa93203eab3abc5 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 20 Aug 2024 17:55:57 +0200 Subject: [PATCH 06/14] use json/jq --- front/admin/cli.ts | 14 ++------------ front/admin/copy_apps.sh | 5 ++--- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/front/admin/cli.ts b/front/admin/cli.ts index 3655f93d78bc..4a3144c21281 100644 --- a/front/admin/cli.ts +++ b/front/admin/cli.ts @@ -187,11 +187,6 @@ const workspace = async (command: string, args: parseArgs.ParsedArgs) => { return; } - case "dump-registry": { - console.log(DustProdActionRegistry); - return; - } - default: console.log(`Unknown workspace command: ${command}`); console.log( @@ -567,15 +562,10 @@ const transcripts = async (command: string, args: parseArgs.ParsedArgs) => { } }; -const registry = async (command: string, args: parseArgs.ParsedArgs) => { +const registry = async (command: string) => { switch (command) { case "dump": { - Object.keys(DustProdActionRegistry).forEach((key) => { - const appName = key as DustRegistryActionName; - console.log( - `${DustProdActionRegistry[appName].app.workspaceId}|${key}|${DustProdActionRegistry[appName].app.appId}|${DustProdActionRegistry[appName].app.appHash}` - ); - }); + console.log(JSON.stringify(DustProdActionRegistry)); return; } diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index c1bcd4b2b77b..b32edc2ad813 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -53,9 +53,8 @@ mkdir -p /tmp/dust-apps cd ${DIR}/.. ./admin/cli.sh registry dump > /tmp/dust-apps/specs 2> /dev/null - -REGISTRY_COUNT=$(cat /tmp/dust-apps/specs | wc -w) -IN_CLAUSE=$(cat /tmp/dust-apps/specs | cut -f4 -d\| | sed -E "s/(.*)/'\\1'/g" | paste -sd "," - ) +REGISTRY_COUNT=$(cat /tmp/dust-apps/specs | jq -r '[.[].app.appHash] | join("\n")' | wc -l) +IN_CLAUSE=$(cat /tmp/dust-apps/specs | jq -r '[.[].app.appHash] | map("'"'"'" + . + "'"'"'") | join(",")' ) LOCAL_COUNT=$(psql $CORE_DATABASE_URI -c "copy (select count(distinct(hash)) from specifications where hash in (${IN_CLAUSE})) to stdout") if [ $REGISTRY_COUNT -eq $LOCAL_COUNT ] From 5fcecdccf3f1b8a30151327292b72309e4bb4706 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 20 Aug 2024 17:58:15 +0200 Subject: [PATCH 07/14] typo --- front/admin/cli.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/front/admin/cli.ts b/front/admin/cli.ts index 4a3144c21281..6a64ec1f5f58 100644 --- a/front/admin/cli.ts +++ b/front/admin/cli.ts @@ -570,7 +570,7 @@ const registry = async (command: string) => { } default: - console.log(`Unknown workspace command: ${command}`); + console.log(`Unknown registry command: ${command}`); console.log("Possible values: `dump`"); } }; From 339f817cda30cb7f02db40604c6396c5f770e73d Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 20 Aug 2024 18:06:44 +0200 Subject: [PATCH 08/14] run only on main branch --- .husky/post-merge | 7 +++++-- .husky/pre-rebase | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.husky/post-merge b/.husky/post-merge index 26cd39742be9..7009796b58ac 100755 --- a/.husky/post-merge +++ b/.husky/post-merge @@ -1,6 +1,9 @@ #!/bin/sh -echo "Running post-merge hook" -./front/admin/copy_apps.sh +current_branch=$(git symbolic-ref --short HEAD) +if [ "$current_branch" = "main" ] +then + ./front/admin/copy_apps.sh +fi exit 0 diff --git a/.husky/pre-rebase b/.husky/pre-rebase index a8d5b1cb948b..7ea4095b89b4 100755 --- a/.husky/pre-rebase +++ b/.husky/pre-rebase @@ -1,6 +1,9 @@ #!/bin/bash -echo "Running pre-rebase hook" -./front/admin/copy_apps.sh +current_branch=$(git symbolic-ref --short HEAD) +if [ "$current_branch" = "main" ] +then + ./front/admin/copy_apps.sh +fi exit 0 From e32d9ec38cb27ff6785338fc35eded97420f96de Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 20 Aug 2024 18:14:15 +0200 Subject: [PATCH 09/14] fix jq command --- front/admin/copy_apps.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index b32edc2ad813..a363e9296d73 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -53,8 +53,12 @@ mkdir -p /tmp/dust-apps cd ${DIR}/.. ./admin/cli.sh registry dump > /tmp/dust-apps/specs 2> /dev/null + +# Get the number of apps in the registry REGISTRY_COUNT=$(cat /tmp/dust-apps/specs | jq -r '[.[].app.appHash] | join("\n")' | wc -l) -IN_CLAUSE=$(cat /tmp/dust-apps/specs | jq -r '[.[].app.appHash] | map("'"'"'" + . + "'"'"'") | join(",")' ) + +# Reads appHash values from JSON, escapes them for shell usage, and concatenates them with commas for SQL queries. +IN_CLAUSE=$(jq -r '[.[].app.appHash] | map("\(. | @sh)") | join(",")' /tmp/dust-apps/specs) LOCAL_COUNT=$(psql $CORE_DATABASE_URI -c "copy (select count(distinct(hash)) from specifications where hash in (${IN_CLAUSE})) to stdout") if [ $REGISTRY_COUNT -eq $LOCAL_COUNT ] From bd1ae822f698101fad38815f14902471bd41c5b2 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 20 Aug 2024 18:19:19 +0200 Subject: [PATCH 10/14] fix --- front/admin/copy_apps.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index a363e9296d73..48afcbead4b2 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -86,7 +86,7 @@ else mv /tmp/dust-apps/FRONT_datasets_transformed.csv /tmp/dust-apps/FRONT_datasets.csv import FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "updatedAt name description schema" - project_ids=$(cut -f 11 FRONT_apps.csv |paste -sd "," -) + project_ids=$(cut -f 11 /tmp/dust-apps/FRONT_apps.csv |paste -sd "," -) # ---- projects fetch CORE projects "id" "\\\"id\\\" in (${project_ids})" @@ -98,9 +98,9 @@ else # ---- datasets fetch CORE datasets "id project created dataset_id hash" "\\\"project\\\" in (${project_ids})" - dataset_ids=$(cut -f 1 CORE_datasets.csv |paste -sd "," -) + dataset_ids=$(cut -f 1 /tmp/dust-apps/CORE_datasets.csv |paste -sd "," -) fetch CORE datasets_joins "id dataset point point_idx" "\\\"dataset\\\" in (${dataset_ids})" - dataset_points_ids=$(cut -f 3 CORE_datasets_joins.csv |paste -sd "," -) + dataset_points_ids=$(cut -f 3 /tmp/dust-apps/CORE_datasets_joins.csv |paste -sd "," -) fetch CORE datasets_points "id hash json" "\\\"id\\\" in (${dataset_points_ids})" import CORE datasets "id project created dataset_id hash" "hash" From dab184ce319e93fa9c3a77c580c7d7ea2fbc5e26 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 20 Aug 2024 19:07:44 +0200 Subject: [PATCH 11/14] Add --force option --- front/admin/copy_apps.sh | 108 ++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index 48afcbead4b2..f31b64e1697f 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -52,61 +52,65 @@ mkdir -p /tmp/dust-apps cd ${DIR}/.. -./admin/cli.sh registry dump > /tmp/dust-apps/specs 2> /dev/null - -# Get the number of apps in the registry -REGISTRY_COUNT=$(cat /tmp/dust-apps/specs | jq -r '[.[].app.appHash] | join("\n")' | wc -l) +if [ "$1" != "--force" ] +then + ./admin/cli.sh registry dump > /tmp/dust-apps/specs 2> /dev/null -# Reads appHash values from JSON, escapes them for shell usage, and concatenates them with commas for SQL queries. -IN_CLAUSE=$(jq -r '[.[].app.appHash] | map("\(. | @sh)") | join(",")' /tmp/dust-apps/specs) -LOCAL_COUNT=$(psql $CORE_DATABASE_URI -c "copy (select count(distinct(hash)) from specifications where hash in (${IN_CLAUSE})) to stdout") + # Get the number of apps in the registry + REGISTRY_COUNT=$(cat /tmp/dust-apps/specs | jq -r '[.[].app.appHash] | join("\n")' | wc -l) -if [ $REGISTRY_COUNT -eq $LOCAL_COUNT ] -then - echo "All apps available, skipping." -else - echo "Will copy apps into workspace ${DUST_APPS_SYNC_WORKSPACE_ID}..." - echo "You'll have to manually update front/lib/api/config.ts to use localhost:3000 instead of dust.tt," - echo "and front/lib/development.ts / types/src/front/lib/actions/registry.ts to set your workspace sId in PRODUCTION_DUST_APPS_WORKSPACE_ID" - echo "Ensure you have valid env variables for DUST_MANAGED_ANTHROPIC_API_KEY, DUST_MANAGED_SERP_API_KEY and DUST_MANAGED_BROWSERLESS_API_KEY." - set -e - - echo "Fetching prodbox pod..." - PRODBOX_POD_NAME=$(kubectl get pods |grep prodbox |cut -d \ -f1) - - # ---- apps - fetch FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "\\\"workspaceId\\\"=5069" - cat /tmp/dust-apps/FRONT_apps.csv | cut -f1-11 | sed -E "s/^(.*)$/\1\t${DUST_APPS_SYNC_WORKSPACE_ID}/g" > /tmp/dust-apps/FRONT_apps_transformed.csv - mv /tmp/dust-apps/FRONT_apps_transformed.csv /tmp/dust-apps/FRONT_apps.csv - import FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "updatedAt name description visibility savedSpecification savedConfig savedRun dustAPIProjectId" - - # ---- datasets - fetch FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "\\\"workspaceId\\\"=5069" - cat /tmp/dust-apps/FRONT_datasets.csv | cut -f1-7 | sed -E "s/^(.*)$/\1\t${DUST_APPS_SYNC_WORKSPACE_ID}/g" > /tmp/dust-apps/FRONT_datasets_transformed.csv - mv /tmp/dust-apps/FRONT_datasets_transformed.csv /tmp/dust-apps/FRONT_datasets.csv - import FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "updatedAt name description schema" - - project_ids=$(cut -f 11 /tmp/dust-apps/FRONT_apps.csv |paste -sd "," -) - - # ---- projects - fetch CORE projects "id" "\\\"id\\\" in (${project_ids})" - import CORE projects "id" "id" - - # ---- specifications - fetch CORE specifications "id project created hash specification" "\\\"project\\\" in (${project_ids})" - import CORE specifications "id project created hash specification" "hash specification" - - # ---- datasets - fetch CORE datasets "id project created dataset_id hash" "\\\"project\\\" in (${project_ids})" - dataset_ids=$(cut -f 1 /tmp/dust-apps/CORE_datasets.csv |paste -sd "," -) - fetch CORE datasets_joins "id dataset point point_idx" "\\\"dataset\\\" in (${dataset_ids})" - dataset_points_ids=$(cut -f 3 /tmp/dust-apps/CORE_datasets_joins.csv |paste -sd "," -) - fetch CORE datasets_points "id hash json" "\\\"id\\\" in (${dataset_points_ids})" - - import CORE datasets "id project created dataset_id hash" "hash" - import CORE datasets_points "id hash json" "hash json" - import CORE datasets_joins " id dataset point point_idx" "point point_idx" + # Reads appHash values from JSON, escapes them for shell usage, and concatenates them with commas for SQL queries. + IN_CLAUSE=$(jq -r '[.[].app.appHash] | map("\(. | @sh)") | join(",")' /tmp/dust-apps/specs) + LOCAL_COUNT=$(psql $CORE_DATABASE_URI -c "copy (select count(distinct(hash)) from specifications where hash in (${IN_CLAUSE})) to stdout") + if [ $REGISTRY_COUNT -eq $LOCAL_COUNT ] + then + echo "All apps available, skipping." + rm -R /tmp/dust-apps + exit 0 + fi fi +echo "Will copy apps into workspace ${DUST_APPS_SYNC_WORKSPACE_ID}..." +echo "You'll have to manually update front/lib/api/config.ts to use localhost:3000 instead of dust.tt," +echo "and front/lib/development.ts / types/src/front/lib/actions/registry.ts to set your workspace sId in PRODUCTION_DUST_APPS_WORKSPACE_ID" +echo "Ensure you have valid env variables for DUST_MANAGED_ANTHROPIC_API_KEY, DUST_MANAGED_SERP_API_KEY and DUST_MANAGED_BROWSERLESS_API_KEY." +set -e + +echo "Fetching prodbox pod..." +PRODBOX_POD_NAME=$(kubectl get pods |grep prodbox |cut -d \ -f1) + +# ---- apps +fetch FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "\\\"workspaceId\\\"=5069" +cat /tmp/dust-apps/FRONT_apps.csv | cut -f1-11 | sed -E "s/^(.*)$/\1\t${DUST_APPS_SYNC_WORKSPACE_ID}/g" > /tmp/dust-apps/FRONT_apps_transformed.csv +mv /tmp/dust-apps/FRONT_apps_transformed.csv /tmp/dust-apps/FRONT_apps.csv +import FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "updatedAt name description visibility savedSpecification savedConfig savedRun dustAPIProjectId" + +# ---- datasets +fetch FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "\\\"workspaceId\\\"=5069" +cat /tmp/dust-apps/FRONT_datasets.csv | cut -f1-7 | sed -E "s/^(.*)$/\1\t${DUST_APPS_SYNC_WORKSPACE_ID}/g" > /tmp/dust-apps/FRONT_datasets_transformed.csv +mv /tmp/dust-apps/FRONT_datasets_transformed.csv /tmp/dust-apps/FRONT_datasets.csv +import FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "updatedAt name description schema" + +project_ids=$(cut -f 11 /tmp/dust-apps/FRONT_apps.csv |paste -sd "," -) + +# ---- projects +fetch CORE projects "id" "\\\"id\\\" in (${project_ids})" +import CORE projects "id" "id" + +# ---- specifications +fetch CORE specifications "id project created hash specification" "\\\"project\\\" in (${project_ids})" +import CORE specifications "id project created hash specification" "hash specification" + +# ---- datasets +fetch CORE datasets "id project created dataset_id hash" "\\\"project\\\" in (${project_ids})" +dataset_ids=$(cut -f 1 /tmp/dust-apps/CORE_datasets.csv |paste -sd "," -) +fetch CORE datasets_joins "id dataset point point_idx" "\\\"dataset\\\" in (${dataset_ids})" +dataset_points_ids=$(cut -f 3 /tmp/dust-apps/CORE_datasets_joins.csv |paste -sd "," -) +fetch CORE datasets_points "id hash json" "\\\"id\\\" in (${dataset_points_ids})" + +import CORE datasets "id project created dataset_id hash" "hash" +import CORE datasets_points "id hash json" "hash json" +import CORE datasets_joins "id dataset point point_idx" "point point_idx" + rm -R /tmp/dust-apps \ No newline at end of file From 3e7fabf3162093de1d844918c20ee00f7916eac8 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Wed, 21 Aug 2024 10:19:15 +0200 Subject: [PATCH 12/14] Skip existing datasets points --- front/admin/copy_apps.sh | 60 ++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index f31b64e1697f..08068ba0b971 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -4,6 +4,9 @@ DIR=$(dirname $0) function escaped_columns_list { echo $* | sed -E 's/ /,/g'| sed -E 's/([a-zA-Z_]+)/\\"\1\\"/g' } +function escaped_values_list { + echo $* | sed -E 's/ /,/g'| sed -E 's/([a-zA-Z_]+)/\\\\'\1\\\\'/g' +} function columns_list { echo $* | sed -E 's/ /,/g'| sed -E 's/([a-zA-Z_]+)/"\1"/g' } @@ -28,17 +31,20 @@ function import { table_name=${2} cols_to_import=${3} cols_to_update=${4} + on_conflict=${5} + additional_where=${6} eval uri='$'${database_uri}_DATABASE_URI echo -n "Preparing ${table_name}... " - psql ${uri} -c "drop table if exists __copy; create table __copy as (select * from ${table_name} limit 0);" - echo -n "Importing ${table_name}... " - psql ${uri} -c "COPY __copy ($(columns_list ${cols_to_import})) from stdin;" < /tmp/dust-apps/${database_uri}_${table_name}.csv - echo -n "Updating existing ${table_name}... " - psql ${uri} -c "update ${table_name} set $(updates_clause $cols_to_update) from __copy where ${table_name}.id = __copy.id;" - echo -n "Inserting new ${table_name}..." - psql ${uri} -c "insert into ${table_name} ($(columns_list ${cols_to_import})) (select $(copy_clause ${cols_to_import}) from __copy left join ${table_name} using(id) where ${table_name} is null);" - echo -n "Cleaning up ${table_name}... " + psql ${uri} -c "drop table if exists __copy" > /dev/null 2>&1 + psql ${uri} -c "create table __copy as (select * from ${table_name} limit 0)" | tr -d '\n' + echo -n "... Importing ${table_name}... " + psql ${uri} -c "COPY __copy ($(columns_list ${cols_to_import})) from stdin;" < /tmp/dust-apps/${database_uri}_${table_name}.csv | tr -d '\n' + echo -n "... Updating existing ${table_name}... " + psql ${uri} -c "update ${table_name} set $(updates_clause $cols_to_update) from __copy where ${table_name}.id = __copy.id;" | tr -d '\n' + echo -n "... Inserting new ${table_name}..." + psql ${uri} -c "insert into ${table_name} ($(columns_list ${cols_to_import})) (select $(copy_clause ${cols_to_import}) from __copy left join ${table_name} using(id) where ${table_name} is null ${additional_where}) ${on_conflict};" | tr -d '\n' + echo -n "... Cleaning up ${table_name}... " psql ${uri} -c "drop table if exists __copy;" } @@ -52,6 +58,13 @@ mkdir -p /tmp/dust-apps cd ${DIR}/.. +if [ "$1" == "--cleanup" ] +then + psql ${FRONT_DATABASE_URI} -c "delete from apps where id>1000;" + psql ${CORE_DATABASE_URI} -c "delete from specifications where id>10000;" + psql ${CORE_DATABASE_URI} -c "delete from datasets_joins where id>10000; delete from datasets_points where id>10000; delete from datasets where id>10000;" +fi + if [ "$1" != "--force" ] then ./admin/cli.sh registry dump > /tmp/dust-apps/specs 2> /dev/null @@ -80,37 +93,42 @@ set -e echo "Fetching prodbox pod..." PRODBOX_POD_NAME=$(kubectl get pods |grep prodbox |cut -d \ -f1) -# ---- apps +# ---- front + fetch FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "\\\"workspaceId\\\"=5069" +project_ids=$(cut -f 11 /tmp/dust-apps/FRONT_apps.csv |paste -sd "," -) +fetch FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "\\\"workspaceId\\\"=5069" + + +# ---- apps cat /tmp/dust-apps/FRONT_apps.csv | cut -f1-11 | sed -E "s/^(.*)$/\1\t${DUST_APPS_SYNC_WORKSPACE_ID}/g" > /tmp/dust-apps/FRONT_apps_transformed.csv mv /tmp/dust-apps/FRONT_apps_transformed.csv /tmp/dust-apps/FRONT_apps.csv import FRONT apps "id createdAt updatedAt sId name description visibility savedSpecification savedConfig savedRun dustAPIProjectId workspaceId" "updatedAt name description visibility savedSpecification savedConfig savedRun dustAPIProjectId" # ---- datasets -fetch FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "\\\"workspaceId\\\"=5069" cat /tmp/dust-apps/FRONT_datasets.csv | cut -f1-7 | sed -E "s/^(.*)$/\1\t${DUST_APPS_SYNC_WORKSPACE_ID}/g" > /tmp/dust-apps/FRONT_datasets_transformed.csv mv /tmp/dust-apps/FRONT_datasets_transformed.csv /tmp/dust-apps/FRONT_datasets.csv import FRONT datasets "id createdAt updatedAt name description schema appId workspaceId" "updatedAt name description schema" -project_ids=$(cut -f 11 /tmp/dust-apps/FRONT_apps.csv |paste -sd "," -) +# ---- core -# ---- projects fetch CORE projects "id" "\\\"id\\\" in (${project_ids})" -import CORE projects "id" "id" - -# ---- specifications fetch CORE specifications "id project created hash specification" "\\\"project\\\" in (${project_ids})" -import CORE specifications "id project created hash specification" "hash specification" - -# ---- datasets fetch CORE datasets "id project created dataset_id hash" "\\\"project\\\" in (${project_ids})" dataset_ids=$(cut -f 1 /tmp/dust-apps/CORE_datasets.csv |paste -sd "," -) fetch CORE datasets_joins "id dataset point point_idx" "\\\"dataset\\\" in (${dataset_ids})" dataset_points_ids=$(cut -f 3 /tmp/dust-apps/CORE_datasets_joins.csv |paste -sd "," -) fetch CORE datasets_points "id hash json" "\\\"id\\\" in (${dataset_points_ids})" +# ---- projects +import CORE projects "id" "id" + +# ---- specifications +import CORE specifications "id project created hash specification" "hash specification" + +# ---- datasets import CORE datasets "id project created dataset_id hash" "hash" -import CORE datasets_points "id hash json" "hash json" -import CORE datasets_joins "id dataset point point_idx" "point point_idx" +import CORE datasets_points "id hash json" "hash json" "on conflict(hash) do nothing" +import CORE datasets_joins "id dataset point point_idx" "point point_idx" "" "and __copy.point in (select id from datasets_points)" -rm -R /tmp/dust-apps \ No newline at end of file +rm -R /tmp/dust-apps From 1160e9faf46d2197b1781898729a1d02392290d7 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Wed, 21 Aug 2024 10:27:53 +0200 Subject: [PATCH 13/14] fix lint --- front/admin/cli.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/front/admin/cli.ts b/front/admin/cli.ts index 6a64ec1f5f58..15950d4b2226 100644 --- a/front/admin/cli.ts +++ b/front/admin/cli.ts @@ -605,7 +605,7 @@ const main = async () => { case "transcripts": return transcripts(command, argv); case "registry": - return registry(command, argv); + return registry(command); default: console.log( "Unknown object type, possible values: `workspace`, `user`, `data-source`, `event-schema`, `conversation`, `transcripts`" From 3f467606b4dc86a3100012ca1f951840a7ded008 Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Tue, 27 Aug 2024 10:57:59 +0200 Subject: [PATCH 14/14] Handle deleted and duplicate apps --- front/admin/copy_apps.sh | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/front/admin/copy_apps.sh b/front/admin/copy_apps.sh index 08068ba0b971..d3fc5450bbdb 100755 --- a/front/admin/copy_apps.sh +++ b/front/admin/copy_apps.sh @@ -69,14 +69,27 @@ if [ "$1" != "--force" ] then ./admin/cli.sh registry dump > /tmp/dust-apps/specs 2> /dev/null - # Get the number of apps in the registry - REGISTRY_COUNT=$(cat /tmp/dust-apps/specs | jq -r '[.[].app.appHash] | join("\n")' | wc -l) - + # Get the appIds in the registry + REGISTRY_APP_IDS=$(cat /tmp/dust-apps/specs | jq -r '[.[].app.appId] | sort_by(.) | join("\n")') # Reads appHash values from JSON, escapes them for shell usage, and concatenates them with commas for SQL queries. IN_CLAUSE=$(jq -r '[.[].app.appHash] | map("\(. | @sh)") | join(",")' /tmp/dust-apps/specs) - LOCAL_COUNT=$(psql $CORE_DATABASE_URI -c "copy (select count(distinct(hash)) from specifications where hash in (${IN_CLAUSE})) to stdout") - - if [ $REGISTRY_COUNT -eq $LOCAL_COUNT ] + # Get projects matching the current specifications + PROJECTS=$(psql $CORE_DATABASE_URI -c "copy (select distinct(project) from specifications where hash in (${IN_CLAUSE})) to stdout" | sed "s/.*/'&'/" | paste -sd, -) + # Get appIds matching the specifications + LOCAL_APP_IDS=$(psql $FRONT_DATABASE_URI -c "copy (select distinct(\"sId\") from apps where \"dustAPIProjectId\" in (${PROJECTS}) and visibility!='deleted' and \"workspaceId\"=${DUST_APPS_SYNC_WORKSPACE_ID} order by \"sId\") to stdout" | paste -sd\ -) + + # Check if any app is missing + MISSING=false + for item in $REGISTRY_APP_IDS + do + if [[ ! " ${LOCAL_APP_IDS} " =~ " $item " ]] + then + echo "Missing app $item" + MISSING=true + fi + done + + if [ "$MISSING" == "false" ] then echo "All apps available, skipping." rm -R /tmp/dust-apps