From 517670528a66a8a55b0b10a56801f7fcbb7389c2 Mon Sep 17 00:00:00 2001 From: Aubin <60398825+aubin-tchoi@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:44:58 +0100 Subject: [PATCH] [parents_migration] Add and backfill Google Drive "Shared with me" folders (#9507) * create and delete SharedWithMe folder * add backfill script * add concurrency on the script * fix the internalId of the sharedWithMe folder * use the dataSourceId in the SharedWithMe folder id instead of the connector ID * move the deletion of the shared with me folder to the clean method * use dataSourceId in the folderId for the sharedWithMe folder * delete from core first * remove the dataSourceId from the GOOGLE_DRIVE_SHARED_WITH_ME_VIRTUAL_ID folder * cleanup * remove the delete, leaving it for another task * inline upsertSharedWithMeFolder * move the upsertion to an activity --- ...20241218_backfill_gdrive_shared_with_me.ts | 37 +++++++++++++++++++ .../src/connectors/google_drive/index.ts | 7 +++- .../google_drive/temporal/activities.ts | 25 ++++++++++++- .../google_drive/temporal/workflows.ts | 3 ++ .../src/resources/connector/google_drive.ts | 4 +- 5 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 connectors/migrations/20241218_backfill_gdrive_shared_with_me.ts diff --git a/connectors/migrations/20241218_backfill_gdrive_shared_with_me.ts b/connectors/migrations/20241218_backfill_gdrive_shared_with_me.ts new file mode 100644 index 000000000000..d7e787e7edfe --- /dev/null +++ b/connectors/migrations/20241218_backfill_gdrive_shared_with_me.ts @@ -0,0 +1,37 @@ +import { makeScript } from "scripts/helpers"; + +import { GOOGLE_DRIVE_SHARED_WITH_ME_VIRTUAL_ID } from "@connectors/connectors/google_drive/lib/consts"; +import { getInternalId } from "@connectors/connectors/google_drive/temporal/utils"; +import { dataSourceConfigFromConnector } from "@connectors/lib/api/data_source_config"; +import { concurrentExecutor } from "@connectors/lib/async_utils"; +import { upsertDataSourceFolder } from "@connectors/lib/data_sources"; +import { ConnectorResource } from "@connectors/resources/connector_resource"; + +makeScript({}, async ({ execute }, logger) => { + const connectors = await ConnectorResource.listByType("google_drive", {}); + + await concurrentExecutor( + connectors, + async (connector) => { + const folderId = getInternalId(GOOGLE_DRIVE_SHARED_WITH_ME_VIRTUAL_ID); + if (execute) { + await upsertDataSourceFolder({ + dataSourceConfig: dataSourceConfigFromConnector(connector), + folderId, + parents: [folderId], + parentId: null, + title: "Shared with me", + mimeType: "application/vnd.dust.googledrive.folder", + }); + logger.info( + `Upserted folder ${folderId} for connector ${connector.id}` + ); + } else { + logger.info( + `Would upsert folder ${folderId} for connector ${connector.id}` + ); + } + }, + { concurrency: 10 } + ); +}); diff --git a/connectors/src/connectors/google_drive/index.ts b/connectors/src/connectors/google_drive/index.ts index 49ff2b1953f1..300eb24aeb35 100644 --- a/connectors/src/connectors/google_drive/index.ts +++ b/connectors/src/connectors/google_drive/index.ts @@ -409,7 +409,7 @@ export class GoogleDriveConnectorManager extends BaseConnectorManager { // that are not living in a shared drive. nodes.push({ provider: c.type, - internalId: GOOGLE_DRIVE_SHARED_WITH_ME_VIRTUAL_ID, + internalId: getInternalId(GOOGLE_DRIVE_SHARED_WITH_ME_VIRTUAL_ID), parentInternalId: null, type: "folder" as const, preventSelection: true, @@ -435,7 +435,10 @@ export class GoogleDriveConnectorManager extends BaseConnectorManager { // The "Shared with me" view requires to look for folders // with the flag `sharedWithMe=true`, but there is no need to check for the parents. let gdriveQuery = `mimeType='application/vnd.google-apps.folder'`; - if (parentInternalId === GOOGLE_DRIVE_SHARED_WITH_ME_VIRTUAL_ID) { + if ( + parentInternalId === + getInternalId(GOOGLE_DRIVE_SHARED_WITH_ME_VIRTUAL_ID) + ) { gdriveQuery += ` and sharedWithMe=true`; } else { gdriveQuery += ` and '${parentDriveId}' in parents`; diff --git a/connectors/src/connectors/google_drive/temporal/activities.ts b/connectors/src/connectors/google_drive/temporal/activities.ts index 9cab0a3653ca..45566089dc52 100644 --- a/connectors/src/connectors/google_drive/temporal/activities.ts +++ b/connectors/src/connectors/google_drive/temporal/activities.ts @@ -7,7 +7,10 @@ import StatsD from "hot-shots"; import PQueue from "p-queue"; import { Op } from "sequelize"; -import { GOOGLE_DRIVE_USER_SPACE_VIRTUAL_DRIVE_ID } from "@connectors/connectors/google_drive/lib/consts"; +import { + GOOGLE_DRIVE_SHARED_WITH_ME_VIRTUAL_ID, + GOOGLE_DRIVE_USER_SPACE_VIRTUAL_DRIVE_ID, +} from "@connectors/connectors/google_drive/lib/consts"; import { getGoogleDriveObject } from "@connectors/connectors/google_drive/lib/google_drive_api"; import { getFileParentsMemoized } from "@connectors/connectors/google_drive/lib/hierarchy"; import { syncOneFile } from "@connectors/connectors/google_drive/temporal/file"; @@ -54,6 +57,26 @@ type LightGoogleDrive = { export const statsDClient = new StatsD(); +/** + * Upserts to data_sources_folders (core) a top-level folder "Shared with me". + */ +export async function upsertSharedWithMeFolder(connectorId: ModelId) { + const connector = await ConnectorResource.fetchById(connectorId); + if (!connector) { + throw new Error(`Connector ${connectorId} not found`); + } + + const folderId = getInternalId(GOOGLE_DRIVE_SHARED_WITH_ME_VIRTUAL_ID); + await upsertDataSourceFolder({ + dataSourceConfig: dataSourceConfigFromConnector(connector), + folderId, + parents: [folderId], + parentId: null, + title: "Shared with me", + mimeType: "application/vnd.dust.googledrive.folder", + }); +} + export async function getDrives( connectorId: ModelId ): Promise { diff --git a/connectors/src/connectors/google_drive/temporal/workflows.ts b/connectors/src/connectors/google_drive/temporal/workflows.ts index 357e5269d7b8..a416b7edeb44 100644 --- a/connectors/src/connectors/google_drive/temporal/workflows.ts +++ b/connectors/src/connectors/google_drive/temporal/workflows.ts @@ -25,6 +25,7 @@ const { garbageCollectorFinished, markFolderAsVisited, shouldGarbageCollect, + upsertSharedWithMeFolder, } = proxyActivities({ startToCloseTimeout: "20 minutes", }); @@ -104,6 +105,8 @@ export async function googleDriveFullSync({ } }); + await upsertSharedWithMeFolder(connectorId); + // Temp to clean up the running workflows state foldersToBrowse = uniq(foldersToBrowse); diff --git a/connectors/src/resources/connector/google_drive.ts b/connectors/src/resources/connector/google_drive.ts index 152610d7bffe..438707d3608e 100644 --- a/connectors/src/resources/connector/google_drive.ts +++ b/connectors/src/resources/connector/google_drive.ts @@ -3,11 +3,9 @@ import type { Transaction } from "sequelize"; import { GoogleDriveConfig, - GoogleDriveSheet, -} from "@connectors/lib/models/google_drive"; -import { GoogleDriveFiles, GoogleDriveFolders, + GoogleDriveSheet, GoogleDriveSyncToken, } from "@connectors/lib/models/google_drive"; import type {