Skip to content

Commit

Permalink
[connectors] Upsert and backfill Confluence spaces as `data_source_fo…
Browse files Browse the repository at this point in the history
…lders` (#9402)

* add two API calls to sync ConfluenceSpaces with data_source_folders

* fix a small oopsies in IDs

* add a backfill script

* add concurrent to the migration script

* move the calls to front -> core to activities
  • Loading branch information
aubin-tchoi authored Dec 16, 2024
1 parent e0cdf54 commit 143e777
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 1 deletion.
43 changes: 43 additions & 0 deletions connectors/migrations/20241216_backfill_confluence_folders.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { makeScript } from "scripts/helpers";

import { makeSpaceInternalId } from "@connectors/connectors/confluence/lib/internal_ids";
import { dataSourceConfigFromConnector } from "@connectors/lib/api/data_source_config";
import { concurrentExecutor } from "@connectors/lib/async_utils";
import { upsertFolderNode } from "@connectors/lib/data_sources";
import { ConfluenceSpace } from "@connectors/lib/models/confluence";
import { ConnectorResource } from "@connectors/resources/connector_resource";

const FOLDER_CONCURRENCY = 10;

makeScript({}, async ({ execute }, logger) => {
const connectors = await ConnectorResource.listByType("confluence", {});

for (const connector of connectors) {
const confluenceSpaces = await ConfluenceSpace.findAll({
attributes: ["spaceId", "name"],
where: { connectorId: connector.id },
});
const dataSourceConfig = dataSourceConfigFromConnector(connector);
if (execute) {
await concurrentExecutor(
confluenceSpaces,
async (space) => {
await upsertFolderNode({
dataSourceConfig,
folderId: makeSpaceInternalId(space.spaceId),
parents: [makeSpaceInternalId(space.spaceId)],
title: space.name,
});
},
{ concurrency: FOLDER_CONCURRENCY }
);
logger.info(
`Upserted ${confluenceSpaces.length} spaces for connector ${connector.id}`
);
} else {
logger.info(
`Found ${confluenceSpaces.length} spaces for connector ${connector.id}`
);
}
}
});
35 changes: 34 additions & 1 deletion connectors/src/connectors/confluence/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,21 @@ import {
getConfluencePageParentIds,
getSpaceHierarchy,
} from "@connectors/connectors/confluence/lib/hierarchy";
import { makePageInternalId } from "@connectors/connectors/confluence/lib/internal_ids";
import {
makePageInternalId,
makeSpaceInternalId,
} from "@connectors/connectors/confluence/lib/internal_ids";
import { makeConfluenceDocumentUrl } from "@connectors/connectors/confluence/temporal/workflow_ids";
import { dataSourceConfigFromConnector } from "@connectors/lib/api/data_source_config";
import { concurrentExecutor } from "@connectors/lib/async_utils";
import type { UpsertToDataSourceParams } from "@connectors/lib/data_sources";
import {
deleteFolderNode,
deleteFromDataSource,
renderDocumentTitleAndContent,
renderMarkdownSection,
updateDocumentParentsField,
upsertFolderNode,
upsertToDatasource,
} from "@connectors/lib/data_sources";
import {
Expand Down Expand Up @@ -196,6 +201,28 @@ export async function confluenceGetSpaceNameActivity({
}
}

/**
* Upserts the page in data_sources_folders (core).
*/
export async function confluenceUpsertSpaceFolderActivity({
connectorId,
spaceId,
spaceName,
}: {
connectorId: ModelId;
spaceId: string;
spaceName: string;
}) {
const connector = await fetchConfluenceConnector(connectorId);

await upsertFolderNode({
dataSourceConfig: dataSourceConfigFromConnector(connector),
folderId: makeSpaceInternalId(spaceId),
parents: [makeSpaceInternalId(spaceId)],
title: spaceName,
});
}

export async function markPageHasVisited({
connectorId,
pageId,
Expand Down Expand Up @@ -814,6 +841,12 @@ export async function confluenceRemoveSpaceActivity(
for (const page of allPages) {
await deletePage(connectorId, page.pageId, dataSourceConfig);
}

// deleting the folder in data_source_folders (core)
await deleteFolderNode({
dataSourceConfig,
folderId: makeSpaceInternalId(spaceId),
});
}

export async function fetchConfluenceSpaceIdsForConnectorActivity({
Expand Down
7 changes: 7 additions & 0 deletions connectors/src/connectors/confluence/temporal/workflows.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const {
fetchConfluenceUserAccountAndConnectorIdsActivity,

fetchConfluenceConfigurationActivity,
confluenceUpsertSpaceFolderActivity,
getSpaceIdsToSyncActivity,
} = proxyActivities<typeof activities>({
startToCloseTimeout: "30 minutes",
Expand Down Expand Up @@ -150,6 +151,12 @@ export async function confluenceSpaceSyncWorkflow(
return startConfluenceRemoveSpaceWorkflow(wInfo, connectorId, spaceId);
}

await confluenceUpsertSpaceFolderActivity({
connectorId,
spaceId,
spaceName,
});

// Get the root level pages for the space.
const rootPageRefs = await confluenceGetRootPageRefsActivity({
connectorId,
Expand Down

0 comments on commit 143e777

Please sign in to comment.