diff --git a/connectors/src/connectors/confluence/lib/hierarchy.ts b/connectors/src/connectors/confluence/lib/hierarchy.ts index 53912b0d42aa..d0c986c4f52b 100644 --- a/connectors/src/connectors/confluence/lib/hierarchy.ts +++ b/connectors/src/connectors/confluence/lib/hierarchy.ts @@ -41,7 +41,7 @@ export async function getConfluencePageParentIds( connectorId: ModelId, page: RawConfluencePage, cachedHierarchy?: Record -) { +): Promise<[string, ...string[], string]> { const pageIdToParentIdMap = cachedHierarchy ?? (await getSpaceHierarchy(connectorId, page.spaceId)); diff --git a/connectors/src/connectors/confluence/temporal/activities.ts b/connectors/src/connectors/confluence/temporal/activities.ts index 0e6b09698093..d6d862a154ae 100644 --- a/connectors/src/connectors/confluence/temporal/activities.ts +++ b/connectors/src/connectors/confluence/temporal/activities.ts @@ -219,6 +219,7 @@ export async function confluenceUpsertSpaceFolderActivity({ dataSourceConfig: dataSourceConfigFromConnector(connector), folderId: makeSpaceInternalId(spaceId), parents: [makeSpaceInternalId(spaceId)], + parentId: null, title: spaceName, mimeType: "application/vnd.dust.confluence.space", }); @@ -252,7 +253,7 @@ export async function markPageHasVisited({ interface ConfluenceUpsertPageInput { page: NonNullable>>; spaceName: string; - parents: string[]; + parents: [string, ...string[], string]; confluenceConfig: ConfluenceConfiguration; syncType?: UpsertDataSourceDocumentParams["upsertContext"]["sync_type"]; dataSourceConfig: DataSourceConfig; diff --git a/connectors/src/connectors/github/temporal/activities.ts b/connectors/src/connectors/github/temporal/activities.ts index 1e297105fe21..487cb08a7eb8 100644 --- a/connectors/src/connectors/github/temporal/activities.ts +++ b/connectors/src/connectors/github/temporal/activities.ts @@ -288,6 +288,11 @@ export async function githubUpsertIssueActivity( tags.push(`author:${issueAuthor}`); } + const parents: [string, string, string] = [ + documentId, + getIssuesInternalId(repoId), + getRepositoryInternalId(repoId), + ]; // TODO: last commentor, last comment date, issue labels (as tags) await upsertDataSourceDocument({ dataSourceConfig, @@ -296,11 +301,8 @@ export async function githubUpsertIssueActivity( documentUrl: issue.url, timestampMs: updatedAtTimestamp, tags: tags, - parents: [ - documentId, - getIssuesInternalId(repoId), - getRepositoryInternalId(repoId), - ], + parents, + parentId: parents[1], loggerArgs: logger.bindings(), upsertContext: { sync_type: isBatchSync ? "batch" : "incremental", @@ -473,6 +475,11 @@ export async function githubUpsertDiscussionActivity( `updatedAt:${new Date(discussion.updatedAt).getTime()}`, ]; + const parents: [string, string, string] = [ + documentId, + getDiscussionsInternalId(repoId), + getRepositoryInternalId(repoId), + ]; await upsertDataSourceDocument({ dataSourceConfig, documentId, @@ -480,11 +487,8 @@ export async function githubUpsertDiscussionActivity( documentUrl: discussion.url, timestampMs: new Date(discussion.createdAt).getTime(), tags, - parents: [ - documentId, - getDiscussionsInternalId(repoId), - getRepositoryInternalId(repoId), - ], + parents, + parentId: parents[1], loggerArgs: logger.bindings(), upsertContext: { sync_type: isBatchSync ? "batch" : "incremental", @@ -955,6 +959,7 @@ export async function githubCodeSyncActivity({ folderId: getCodeRootInternalId(repoId), title: "Code", parents: [getCodeRootInternalId(repoId), getRepositoryInternalId(repoId)], + parentId: getRepositoryInternalId(repoId), mimeType: "application/vnd.dust.github.code.root", }); @@ -1147,6 +1152,11 @@ export async function githubCodeSyncActivity({ `lasUpdatedAt:${codeSyncStartedAt.getTime()}`, ]; + const parents: [...string[], string, string] = [ + ...f.parents, + rootInternalId, + getRepositoryInternalId(repoId), + ]; // Time to upload the file to the data source. await upsertDataSourceDocument({ dataSourceConfig, @@ -1155,11 +1165,8 @@ export async function githubCodeSyncActivity({ documentUrl: f.sourceUrl, timestampMs: codeSyncStartedAt.getTime(), tags, - parents: [ - ...f.parents, - rootInternalId, - getRepositoryInternalId(repoId), - ], + parents, + parentId: parents[1], loggerArgs: logger.bindings(), upsertContext: { sync_type: isBatchSync ? "batch" : "incremental", @@ -1198,14 +1205,16 @@ export async function githubCodeSyncActivity({ Context.current().heartbeat(); const parentInternalId = d.parentInternalId || rootInternalId; + const parents: [...string[], string, string] = [ + ...d.parents, + getCodeRootInternalId(repoId), + getRepositoryInternalId(repoId), + ]; await upsertDataSourceFolder({ dataSourceConfig, folderId: d.internalId, - parents: [ - ...d.parents, - getCodeRootInternalId(repoId), - getRepositoryInternalId(repoId), - ], + parents, + parentId: parents[1], title: d.dirName, mimeType: "application/vnd.dust.github.code.directory", }); @@ -1346,6 +1355,7 @@ export async function githubUpsertRepositoryFolderActivity({ folderId: getRepositoryInternalId(repoId), title: repoName, parents: [getRepositoryInternalId(repoId)], + parentId: null, mimeType: "application/vnd.dust.github.repository", }); } @@ -1366,6 +1376,7 @@ export async function githubUpsertIssuesFolderActivity({ folderId: getIssuesInternalId(repoId), title: "Issues", parents: [getIssuesInternalId(repoId), getRepositoryInternalId(repoId)], + parentId: getRepositoryInternalId(repoId), mimeType: "application/vnd.dust.github.issues", }); } @@ -1389,6 +1400,7 @@ export async function githubUpsertDiscussionsFolderActivity({ getDiscussionsInternalId(repoId), getRepositoryInternalId(repoId), ], + parentId: getRepositoryInternalId(repoId), mimeType: "application/vnd.dust.github.discussions", }); } @@ -1409,6 +1421,7 @@ export async function githubUpsertCodeRootFolderActivity({ folderId: getCodeRootInternalId(repoId), title: "Code", parents: [getCodeRootInternalId(repoId), getRepositoryInternalId(repoId)], + parentId: getRepositoryInternalId(repoId), mimeType: "application/vnd.dust.github.code.root", }); } diff --git a/connectors/src/connectors/google_drive/temporal/activities.ts b/connectors/src/connectors/google_drive/temporal/activities.ts index 45566089dc52..65e1a2859835 100644 --- a/connectors/src/connectors/google_drive/temporal/activities.ts +++ b/connectors/src/connectors/google_drive/temporal/activities.ts @@ -512,6 +512,7 @@ export async function incrementalSync( dataSourceConfig, folderId: getInternalId(driveFile.id), parents, + parentId: parents[1] || null, title: driveFile.name ?? "", mimeType: "application/vnd.dust.googledrive.folder", }); @@ -856,6 +857,7 @@ export async function markFolderAsVisited( dataSourceConfig, folderId: getInternalId(file.id), parents, + parentId: parents[1] || null, title: file.name ?? "", mimeType: "application/vnd.dust.googledrive.folder", }); diff --git a/connectors/src/connectors/google_drive/temporal/file.ts b/connectors/src/connectors/google_drive/temporal/file.ts index eb327593e00f..2f5d918b4fcc 100644 --- a/connectors/src/connectors/google_drive/temporal/file.ts +++ b/connectors/src/connectors/google_drive/temporal/file.ts @@ -492,6 +492,7 @@ async function upsertGdriveDocument( timestampMs: file.updatedAtMs, tags, parents, + parentId: parents[1] || null, upsertContext: { sync_type: isBatchSync ? "batch" : "incremental", }, diff --git a/connectors/src/connectors/intercom/lib/utils.ts b/connectors/src/connectors/intercom/lib/utils.ts index 2db0c96a85ad..a763145be6e5 100644 --- a/connectors/src/connectors/intercom/lib/utils.ts +++ b/connectors/src/connectors/intercom/lib/utils.ts @@ -149,7 +149,7 @@ export async function getParentIdsForArticle({ connectorId: number; parentCollectionId: string; helpCenterId: string; -}) { +}): Promise<[string, string, ...string[], string]> { // Get collection parents const collectionParents = await getParentIdsForCollection({ connectorId, @@ -168,11 +168,8 @@ export async function getParentIdsForCollection({ connectorId: number; collectionId: string; helpCenterId: string; -}) { - // Initialize the internal IDs array with the collection ID. - const parentIds = [ - getHelpCenterCollectionInternalId(connectorId, collectionId), - ]; +}): Promise<[string, ...string[], string]> { + const parentIds = []; // Fetch and add any parent collection Ids. let currentParentId = collectionId; @@ -196,8 +193,10 @@ export async function getParentIdsForCollection({ ); } - // Add the help center internal ID. - parentIds.push(getHelpCenterInternalId(connectorId, helpCenterId)); - - return parentIds; + // Add the collection ID and the help center internal ID. + return [ + getHelpCenterCollectionInternalId(connectorId, collectionId), + ...parentIds, + getHelpCenterInternalId(connectorId, helpCenterId), + ]; } diff --git a/connectors/src/connectors/intercom/temporal/activities.ts b/connectors/src/connectors/intercom/temporal/activities.ts index c1b27952385b..de1b06a85ed0 100644 --- a/connectors/src/connectors/intercom/temporal/activities.ts +++ b/connectors/src/connectors/intercom/temporal/activities.ts @@ -30,14 +30,12 @@ import { import { dataSourceConfigFromConnector } from "@connectors/lib/api/data_source_config"; import { concurrentExecutor } from "@connectors/lib/async_utils"; import { upsertDataSourceFolder } from "@connectors/lib/data_sources"; -import { - IntercomConversation, - IntercomWorkspace, -} from "@connectors/lib/models/intercom"; import { IntercomCollection, + IntercomConversation, IntercomHelpCenter, IntercomTeam, + IntercomWorkspace, } from "@connectors/lib/models/intercom"; import { syncStarted, syncSucceeded } from "@connectors/lib/sync_status"; import logger from "@connectors/logger/logger"; @@ -177,6 +175,7 @@ export async function syncHelpCenterOnlyActivity({ folderId: helpCenterInternalId, title: helpCenterOnIntercom.display_name || "Help Center", parents: [helpCenterInternalId], + parentId: null, mimeType: getDataSourceNodeMimeType("HELP_CENTER"), }); @@ -509,6 +508,7 @@ export async function syncTeamOnlyActivity({ folderId: teamInternalId, title: teamOnIntercom.name, parents: [teamInternalId, getTeamsInternalId(connectorId)], + parentId: getTeamsInternalId(connectorId), mimeType: getDataSourceNodeMimeType("TEAM"), }); @@ -743,6 +743,7 @@ export async function upsertIntercomTeamsFolderActivity({ folderId: getTeamsInternalId(connectorId), title: "Conversations", parents: [getTeamsInternalId(connectorId)], + parentId: null, mimeType: getDataSourceNodeMimeType("CONVERSATIONS_FOLDER"), }); } diff --git a/connectors/src/connectors/intercom/temporal/sync_conversation.ts b/connectors/src/connectors/intercom/temporal/sync_conversation.ts index b68b03f9b6ac..56b00b70447a 100644 --- a/connectors/src/connectors/intercom/temporal/sync_conversation.ts +++ b/connectors/src/connectors/intercom/temporal/sync_conversation.ts @@ -306,11 +306,13 @@ export async function syncConversation({ // parents in the Core datasource map the internal ids that are used in the permission system // they self reference the document id const documentId = getConversationInternalId(connectorId, conversation.id); - const parents = [documentId]; - if (conversationTeamId) { - parents.push(getTeamInternalId(connectorId, conversationTeamId)); - } - parents.push(getTeamsInternalId(connectorId)); + const parents: [string, ...string[], string] = [ + documentId, + ...(conversationTeamId + ? [getTeamInternalId(connectorId, conversationTeamId)] + : []), + getTeamsInternalId(connectorId), + ]; await upsertDataSourceDocument({ dataSourceConfig, @@ -320,6 +322,7 @@ export async function syncConversation({ timestampMs: updatedAtDate.getTime(), tags: datasourceTags, parents, + parentId: parents[1], loggerArgs: { ...loggerArgs, conversationId: conversation.id, diff --git a/connectors/src/connectors/intercom/temporal/sync_help_center.ts b/connectors/src/connectors/intercom/temporal/sync_help_center.ts index db50d4e0ca49..8aa9d21dd94c 100644 --- a/connectors/src/connectors/intercom/temporal/sync_help_center.ts +++ b/connectors/src/connectors/intercom/temporal/sync_help_center.ts @@ -228,7 +228,7 @@ export async function upsertCollectionWithChildren({ folderId: internalCollectionId, title: collection.name, parents: collectionParents, - parentId: collectionParents.length > 2 ? collectionParents[1] : null, + parentId: collectionParents[1], mimeType: getDataSourceNodeMimeType("COLLECTION"), }); @@ -420,6 +420,7 @@ export async function upsertArticle({ `updatedAt:${updatedAtDate.getTime()}`, ], parents, + parentId: parents[1], loggerArgs: { ...loggerArgs, articleId: article.id, diff --git a/connectors/src/connectors/microsoft/temporal/activities.ts b/connectors/src/connectors/microsoft/temporal/activities.ts index 9684629da143..95ce21e40c28 100644 --- a/connectors/src/connectors/microsoft/temporal/activities.ts +++ b/connectors/src/connectors/microsoft/temporal/activities.ts @@ -206,6 +206,7 @@ export async function getRootNodesToSyncFromResources( dataSourceConfig, folderId: createdOrUpdatedResource.internalId, parents: [createdOrUpdatedResource.internalId], + parentId: null, title: createdOrUpdatedResource.name ?? "", mimeType: "application/vnd.dust.microsoft.folder", }), @@ -477,6 +478,7 @@ export async function syncFiles({ dataSourceConfig, folderId: createdOrUpdatedResource.internalId, parents: [createdOrUpdatedResource.internalId, ...parents], + parentId: parents[0], title: createdOrUpdatedResource.name ?? "", mimeType: "application/vnd.dust.microsoft.folder", }), @@ -650,6 +652,7 @@ export async function syncDeltaForRootNodesInDrive({ dataSourceConfig, folderId: blob.internalId, parents: [blob.internalId], + parentId: null, title: blob.name ?? "", mimeType: "application/vnd.dust.microsoft.folder", }); diff --git a/connectors/src/connectors/microsoft/temporal/file.ts b/connectors/src/connectors/microsoft/temporal/file.ts index 1eb3ee24636c..2543c013f8d8 100644 --- a/connectors/src/connectors/microsoft/temporal/file.ts +++ b/connectors/src/connectors/microsoft/temporal/file.ts @@ -306,6 +306,7 @@ export async function syncOneFile({ timestampMs: upsertTimestampMs, tags, parents, + parentId: parents[1] || null, upsertContext: { sync_type: isBatchSync ? "batch" : "incremental", }, @@ -352,7 +353,7 @@ export async function getParents({ connectorId: ModelId; internalId: string; startSyncTs: number; -}): Promise { +}): Promise<[string, ...string[]]> { const parentInternalId = await getParentId( connectorId, internalId, diff --git a/connectors/src/connectors/webcrawler/temporal/activities.ts b/connectors/src/connectors/webcrawler/temporal/activities.ts index cea57969e776..b27200375a3d 100644 --- a/connectors/src/connectors/webcrawler/temporal/activities.ts +++ b/connectors/src/connectors/webcrawler/temporal/activities.ts @@ -279,15 +279,16 @@ export async function crawlWebsiteByConnectorId(connectorId: ModelId) { lastSeenAt: new Date(), }); + // parent folder ids of the page are in hierarchy order from the + // page to the root so for the current folder, its parents start at + // index+1 (including itself as first parent) and end at the root + const parents = parentFolderIds.slice(index + 1); await upsertDataSourceFolder({ dataSourceConfig, folderId: webCrawlerFolder.internalId, timestampMs: webCrawlerFolder.updatedAt.getTime(), - - // parent folder ids of the page are in hierarchy order from the - // page to the root so for the current folder, its parents start at - // index+1 (including itself as first parent) and end at the root - parents: parentFolderIds.slice(index + 1), + parents, + parentId: parents[1] || null, title: folder, mimeType: "application/vnd.dust.webcrawler.folder", }); @@ -363,6 +364,7 @@ export async function crawlWebsiteByConnectorId(connectorId: ModelId) { timestampMs: new Date().getTime(), tags: [`title:${stripNullBytes(pageTitle)}`], parents: parentFolderIds, + parentId: parentFolderIds[1] || null, upsertContext: { sync_type: "batch", }, diff --git a/connectors/src/connectors/zendesk/lib/sync_article.ts b/connectors/src/connectors/zendesk/lib/sync_article.ts index 79770c7cd7ee..ed3d7ddabbf6 100644 --- a/connectors/src/connectors/zendesk/lib/sync_article.ts +++ b/connectors/src/connectors/zendesk/lib/sync_article.ts @@ -151,6 +151,7 @@ export async function syncArticle({ articleId: article.id, }); + const parents = articleInDb.getParentInternalIds(connectorId); await upsertDataSourceDocument({ dataSourceConfig, documentId, @@ -162,7 +163,8 @@ export async function syncArticle({ `createdAt:${createdAt.getTime()}`, `updatedAt:${updatedAt.getTime()}`, ], - parents: articleInDb.getParentInternalIds(connectorId), + parents, + parentId: parents[1], loggerArgs: { ...loggerArgs, articleId: article.id }, upsertContext: { sync_type: "batch" }, title: article.title, diff --git a/connectors/src/connectors/zendesk/lib/sync_category.ts b/connectors/src/connectors/zendesk/lib/sync_category.ts index ed27450f20d6..55f9b23e082b 100644 --- a/connectors/src/connectors/zendesk/lib/sync_category.ts +++ b/connectors/src/connectors/zendesk/lib/sync_category.ts @@ -104,6 +104,7 @@ export async function syncCategory({ dataSourceConfig, folderId: parents[0], parents, + parentId: parents[1], title: categoryInDb.name, mimeType: "application/vnd.dust.zendesk.category", }); diff --git a/connectors/src/connectors/zendesk/lib/sync_ticket.ts b/connectors/src/connectors/zendesk/lib/sync_ticket.ts index 2d5dc17d33c6..b626b53fd5c6 100644 --- a/connectors/src/connectors/zendesk/lib/sync_ticket.ts +++ b/connectors/src/connectors/zendesk/lib/sync_ticket.ts @@ -209,6 +209,7 @@ ${comments ticketId: ticket.id, }); + const parents = ticketInDb.getParentInternalIds(connectorId); await upsertDataSourceDocument({ dataSourceConfig, documentId, @@ -221,7 +222,8 @@ ${comments `updatedAt:${updatedAtDate.getTime()}`, `createdAt:${createdAtDate.getTime()}`, ], - parents: ticketInDb.getParentInternalIds(connectorId), + parents, + parentId: parents[1], loggerArgs: { ...loggerArgs, ticketId: ticket.id }, upsertContext: { sync_type: "batch" }, title: ticket.subject, diff --git a/connectors/src/connectors/zendesk/temporal/activities.ts b/connectors/src/connectors/zendesk/temporal/activities.ts index 1481b3cde5a0..2df641d54c5d 100644 --- a/connectors/src/connectors/zendesk/temporal/activities.ts +++ b/connectors/src/connectors/zendesk/temporal/activities.ts @@ -132,6 +132,7 @@ export async function syncZendeskBrandActivity({ dataSourceConfig, folderId: brandInternalId, parents: [brandInternalId], + parentId: null, title: brandInDb.name, mimeType: "application/vnd.dust.zendesk.brand", }); @@ -142,6 +143,7 @@ export async function syncZendeskBrandActivity({ dataSourceConfig, folderId: helpCenterNode.internalId, parents: [helpCenterNode.internalId, helpCenterNode.parentInternalId], + parentId: helpCenterNode.parentInternalId, title: helpCenterNode.title, mimeType: "application/vnd.dust.zendesk.helpcenter", }); @@ -152,6 +154,7 @@ export async function syncZendeskBrandActivity({ dataSourceConfig, folderId: ticketsNode.internalId, parents: [ticketsNode.internalId, ticketsNode.parentInternalId], + parentId: ticketsNode.parentInternalId, title: ticketsNode.title, mimeType: "application/vnd.dust.zendesk.tickets", }); @@ -326,6 +329,7 @@ export async function syncZendeskCategoryActivity({ dataSourceConfig: dataSourceConfigFromConnector(connector), folderId: parents[0], parents, + parentId: parents[1], title: categoryInDb.name, mimeType: "application/vnd.dust.zendesk.category", }); diff --git a/connectors/src/connectors/zendesk/temporal/incremental_activities.ts b/connectors/src/connectors/zendesk/temporal/incremental_activities.ts index 08fd18c484d9..2522aa32c3c6 100644 --- a/connectors/src/connectors/zendesk/temporal/incremental_activities.ts +++ b/connectors/src/connectors/zendesk/temporal/incremental_activities.ts @@ -147,6 +147,7 @@ export async function syncZendeskArticleUpdateBatchActivity({ dataSourceConfig, folderId: parents[0], parents, + parentId: parents[1], title: category.name, mimeType: "application/vnd.dust.zendesk.category", });