From 448a5a2c05a845dd577e757cebe37cb5d1e7c8c4 Mon Sep 17 00:00:00 2001 From: Henry Fontanier Date: Thu, 14 Nov 2024 17:28:37 +0100 Subject: [PATCH] enh: add sId column on content fragments (#8647) * 0;276;0cenh: add sId column on content fragments * index concurrently * add backfill script ref in SQL * move index creation to second migration --------- Co-authored-by: Henry Fontanier --- .../resources/content_fragment_resource.ts | 4 +- .../storage/models/content_fragment.ts | 7 ++- front/migrations/20241114_backfill_cf_sid.ts | 62 +++++++++++++++++++ front/migrations/db/migration_111.sql | 5 ++ front/migrations/db/migration_112.sql | 10 +++ 5 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 front/migrations/20241114_backfill_cf_sid.ts create mode 100644 front/migrations/db/migration_111.sql create mode 100644 front/migrations/db/migration_112.sql diff --git a/front/lib/resources/content_fragment_resource.ts b/front/lib/resources/content_fragment_resource.ts index f4e9394b3412..204598c6c59f 100644 --- a/front/lib/resources/content_fragment_resource.ts +++ b/front/lib/resources/content_fragment_resource.ts @@ -23,6 +23,7 @@ import { BaseResource } from "@app/lib/resources/base_resource"; import { FileResource } from "@app/lib/resources/file_resource"; import { ContentFragmentModel } from "@app/lib/resources/storage/models/content_fragment"; import type { ReadonlyAttributesType } from "@app/lib/resources/storage/types"; +import { generateRandomModelSId } from "@app/lib/resources/string_ids"; import logger from "@app/logger/logger"; const MAX_BYTE_SIZE_CSV_RENDER_FULL_CONTENT = 500 * 1024; // 500 KB @@ -45,12 +46,13 @@ export class ContentFragmentResource extends BaseResource } static async makeNew( - blob: CreationAttributes, + blob: Omit, "sId">, transaction?: Transaction ) { const contentFragment = await ContentFragmentModel.create( { ...blob, + sId: generateRandomModelSId("cf"), }, { transaction, diff --git a/front/lib/resources/storage/models/content_fragment.ts b/front/lib/resources/storage/models/content_fragment.ts index 9c551a11054c..b1f368cca141 100644 --- a/front/lib/resources/storage/models/content_fragment.ts +++ b/front/lib/resources/storage/models/content_fragment.ts @@ -19,6 +19,7 @@ export class ContentFragmentModel extends Model< declare createdAt: CreationOptional; declare updatedAt: CreationOptional; + declare sId: string; declare title: string; declare contentType: SupportedContentFragmentType; declare sourceUrl: string | null; // GCS (upload) or Slack or ... @@ -54,6 +55,10 @@ ContentFragmentModel.init( allowNull: false, defaultValue: DataTypes.NOW, }, + sId: { + type: DataTypes.STRING, + allowNull: false, + }, title: { type: DataTypes.TEXT, allowNull: false, @@ -90,7 +95,7 @@ ContentFragmentModel.init( { modelName: "content_fragment", sequelize: frontSequelize, - indexes: [{ fields: ["fileId"] }], + indexes: [{ fields: ["fileId"] }, { fields: ["sId"] }], } ); diff --git a/front/migrations/20241114_backfill_cf_sid.ts b/front/migrations/20241114_backfill_cf_sid.ts new file mode 100644 index 000000000000..e4d77db327d0 --- /dev/null +++ b/front/migrations/20241114_backfill_cf_sid.ts @@ -0,0 +1,62 @@ +import { Op } from "sequelize"; + +import { ContentFragmentModel } from "@app/lib/resources/storage/models/content_fragment"; +import { generateRandomModelSId } from "@app/lib/resources/string_ids"; +import { makeScript } from "@app/scripts/helpers"; + +makeScript({}, async ({ execute }, logger) => { + let lastSeenId = 0; + const batchSize = 1000; + + for (;;) { + // Find content fragments without sId + const contentFragments: ContentFragmentModel[] = + await ContentFragmentModel.findAll({ + // @ts-expect-error -- sequelize type for sId is not nullable (it temporarily is in db) + where: { + id: { + [Op.gt]: lastSeenId, + }, + sId: { + [Op.is]: null, + }, + }, + order: [["id", "ASC"]], + limit: batchSize, + }); + + if (contentFragments.length === 0) { + break; + } + + logger.info( + `Processing ${contentFragments.length} content fragments starting from ID ${lastSeenId}` + ); + + if (execute) { + await Promise.all( + contentFragments.map(async (cf) => { + const sId = generateRandomModelSId("cf"); + await cf.update({ sId }); + logger.info( + { + contentFragmentId: cf.id, + sId, + }, + "Updated content fragment with sId" + ); + }) + ); + } else { + logger.info( + { + lastSeenId, + count: contentFragments.length, + }, + "Dry run - would have updated content fragments with sIds" + ); + } + + lastSeenId = contentFragments[contentFragments.length - 1].id; + } +}); diff --git a/front/migrations/db/migration_111.sql b/front/migrations/db/migration_111.sql new file mode 100644 index 000000000000..7aa391f7549a --- /dev/null +++ b/front/migrations/db/migration_111.sql @@ -0,0 +1,5 @@ +-- Migration created on Nov 14, 2024 +ALTER TABLE + "public"."content_fragments" +ADD + COLUMN "sId" VARCHAR(255); \ No newline at end of file diff --git a/front/migrations/db/migration_112.sql b/front/migrations/db/migration_112.sql new file mode 100644 index 000000000000..572b7094c6d2 --- /dev/null +++ b/front/migrations/db/migration_112.sql @@ -0,0 +1,10 @@ +-- Migration created on Nov 14, 2024 +-- Backfill script that needs to be ran: 20241114_backfill_cf_sid.ts +ALTER TABLE + "public"."content_fragments" +ALTER COLUMN + "sId" +SET + NOT NULL; + +CREATE INDEX CONCURRENTLY "content_fragments_s_id" ON "content_fragments" ("sId"); \ No newline at end of file