diff --git a/connectors/migrations/20241216_backfill_confluence_folders.ts b/connectors/migrations/20241216_backfill_confluence_folders.ts new file mode 100644 index 000000000000..e2f3888091ab --- /dev/null +++ b/connectors/migrations/20241216_backfill_confluence_folders.ts @@ -0,0 +1,43 @@ +import { makeScript } from "scripts/helpers"; + +import { makeSpaceInternalId } from "@connectors/connectors/confluence/lib/internal_ids"; +import { dataSourceConfigFromConnector } from "@connectors/lib/api/data_source_config"; +import { concurrentExecutor } from "@connectors/lib/async_utils"; +import { upsertFolderNode } from "@connectors/lib/data_sources"; +import { ConfluenceSpace } from "@connectors/lib/models/confluence"; +import { ConnectorResource } from "@connectors/resources/connector_resource"; + +const FOLDER_CONCURRENCY = 10; + +makeScript({}, async ({ execute }, logger) => { + const connectors = await ConnectorResource.listByType("confluence", {}); + + for (const connector of connectors) { + const confluenceSpaces = await ConfluenceSpace.findAll({ + attributes: ["spaceId", "name"], + where: { connectorId: connector.id }, + }); + const dataSourceConfig = dataSourceConfigFromConnector(connector); + if (execute) { + await concurrentExecutor( + confluenceSpaces, + async (space) => { + await upsertFolderNode({ + dataSourceConfig, + folderId: makeSpaceInternalId(space.spaceId), + parents: [makeSpaceInternalId(space.spaceId)], + title: space.name, + }); + }, + { concurrency: FOLDER_CONCURRENCY } + ); + logger.info( + `Upserted ${confluenceSpaces.length} spaces for connector ${connector.id}` + ); + } else { + logger.info( + `Found ${confluenceSpaces.length} spaces for connector ${connector.id}` + ); + } + } +}); diff --git a/connectors/src/connectors/confluence/temporal/activities.ts b/connectors/src/connectors/confluence/temporal/activities.ts index af5104ea2afd..d5aed2ca4d24 100644 --- a/connectors/src/connectors/confluence/temporal/activities.ts +++ b/connectors/src/connectors/confluence/temporal/activities.ts @@ -18,16 +18,21 @@ import { getConfluencePageParentIds, getSpaceHierarchy, } from "@connectors/connectors/confluence/lib/hierarchy"; -import { makePageInternalId } from "@connectors/connectors/confluence/lib/internal_ids"; +import { + makePageInternalId, + makeSpaceInternalId, +} from "@connectors/connectors/confluence/lib/internal_ids"; import { makeConfluenceDocumentUrl } from "@connectors/connectors/confluence/temporal/workflow_ids"; import { dataSourceConfigFromConnector } from "@connectors/lib/api/data_source_config"; import { concurrentExecutor } from "@connectors/lib/async_utils"; import type { UpsertToDataSourceParams } from "@connectors/lib/data_sources"; import { + deleteFolderNode, deleteFromDataSource, renderDocumentTitleAndContent, renderMarkdownSection, updateDocumentParentsField, + upsertFolderNode, upsertToDatasource, } from "@connectors/lib/data_sources"; import { @@ -196,6 +201,28 @@ export async function confluenceGetSpaceNameActivity({ } } +/** + * Upserts the page in data_sources_folders (core). + */ +export async function confluenceUpsertSpaceFolderActivity({ + connectorId, + spaceId, + spaceName, +}: { + connectorId: ModelId; + spaceId: string; + spaceName: string; +}) { + const connector = await fetchConfluenceConnector(connectorId); + + await upsertFolderNode({ + dataSourceConfig: dataSourceConfigFromConnector(connector), + folderId: makeSpaceInternalId(spaceId), + parents: [makeSpaceInternalId(spaceId)], + title: spaceName, + }); +} + export async function markPageHasVisited({ connectorId, pageId, @@ -814,6 +841,12 @@ export async function confluenceRemoveSpaceActivity( for (const page of allPages) { await deletePage(connectorId, page.pageId, dataSourceConfig); } + + // deleting the folder in data_source_folders (core) + await deleteFolderNode({ + dataSourceConfig, + folderId: makeSpaceInternalId(spaceId), + }); } export async function fetchConfluenceSpaceIdsForConnectorActivity({ diff --git a/connectors/src/connectors/confluence/temporal/workflows.ts b/connectors/src/connectors/confluence/temporal/workflows.ts index 47b260815274..1aed30e58646 100644 --- a/connectors/src/connectors/confluence/temporal/workflows.ts +++ b/connectors/src/connectors/confluence/temporal/workflows.ts @@ -37,6 +37,7 @@ const { fetchConfluenceUserAccountAndConnectorIdsActivity, fetchConfluenceConfigurationActivity, + confluenceUpsertSpaceFolderActivity, getSpaceIdsToSyncActivity, } = proxyActivities({ startToCloseTimeout: "30 minutes", @@ -150,6 +151,12 @@ export async function confluenceSpaceSyncWorkflow( return startConfluenceRemoveSpaceWorkflow(wInfo, connectorId, spaceId); } + await confluenceUpsertSpaceFolderActivity({ + connectorId, + spaceId, + spaceName, + }); + // Get the root level pages for the space. const rootPageRefs = await confluenceGetRootPageRefsActivity({ connectorId,