Skip to content

Commit

Permalink
add parentId wherever missing
Browse files Browse the repository at this point in the history
  • Loading branch information
aubin-tchoi committed Dec 20, 2024
1 parent 21439c9 commit 7b3aeb2
Show file tree
Hide file tree
Showing 17 changed files with 87 additions and 50 deletions.
2 changes: 1 addition & 1 deletion connectors/src/connectors/confluence/lib/hierarchy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ export async function getConfluencePageParentIds(
connectorId: ModelId,
page: RawConfluencePage,
cachedHierarchy?: Record<string, string | null>
) {
): Promise<[string, ...string[], string]> {
const pageIdToParentIdMap =
cachedHierarchy ?? (await getSpaceHierarchy(connectorId, page.spaceId));

Expand Down
3 changes: 2 additions & 1 deletion connectors/src/connectors/confluence/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ export async function confluenceUpsertSpaceFolderActivity({
dataSourceConfig: dataSourceConfigFromConnector(connector),
folderId: makeSpaceInternalId(spaceId),
parents: [makeSpaceInternalId(spaceId)],
parentId: null,
title: spaceName,
mimeType: "application/vnd.dust.confluence.space",
});
Expand Down Expand Up @@ -252,7 +253,7 @@ export async function markPageHasVisited({
interface ConfluenceUpsertPageInput {
page: NonNullable<Awaited<ReturnType<ConfluenceClient["getPageById"]>>>;
spaceName: string;
parents: string[];
parents: [string, ...string[], string];
confluenceConfig: ConfluenceConfiguration;
syncType?: UpsertDataSourceDocumentParams["upsertContext"]["sync_type"];
dataSourceConfig: DataSourceConfig;
Expand Down
53 changes: 33 additions & 20 deletions connectors/src/connectors/github/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,11 @@ export async function githubUpsertIssueActivity(
tags.push(`author:${issueAuthor}`);
}

const parents: [string, string, string] = [
documentId,
getIssuesInternalId(repoId),
getRepositoryInternalId(repoId),
];
// TODO: last commentor, last comment date, issue labels (as tags)
await upsertDataSourceDocument({
dataSourceConfig,
Expand All @@ -296,11 +301,8 @@ export async function githubUpsertIssueActivity(
documentUrl: issue.url,
timestampMs: updatedAtTimestamp,
tags: tags,
parents: [
documentId,
getIssuesInternalId(repoId),
getRepositoryInternalId(repoId),
],
parents,
parentId: parents[1],
loggerArgs: logger.bindings(),
upsertContext: {
sync_type: isBatchSync ? "batch" : "incremental",
Expand Down Expand Up @@ -473,18 +475,20 @@ export async function githubUpsertDiscussionActivity(
`updatedAt:${new Date(discussion.updatedAt).getTime()}`,
];

const parents: [string, string, string] = [
documentId,
getDiscussionsInternalId(repoId),
getRepositoryInternalId(repoId),
];
await upsertDataSourceDocument({
dataSourceConfig,
documentId,
documentContent: renderedDiscussion,
documentUrl: discussion.url,
timestampMs: new Date(discussion.createdAt).getTime(),
tags,
parents: [
documentId,
getDiscussionsInternalId(repoId),
getRepositoryInternalId(repoId),
],
parents,
parentId: parents[1],
loggerArgs: logger.bindings(),
upsertContext: {
sync_type: isBatchSync ? "batch" : "incremental",
Expand Down Expand Up @@ -955,6 +959,7 @@ export async function githubCodeSyncActivity({
folderId: getCodeRootInternalId(repoId),
title: "Code",
parents: [getCodeRootInternalId(repoId), getRepositoryInternalId(repoId)],
parentId: getRepositoryInternalId(repoId),
mimeType: "application/vnd.dust.github.code.root",
});

Expand Down Expand Up @@ -1147,6 +1152,11 @@ export async function githubCodeSyncActivity({
`lasUpdatedAt:${codeSyncStartedAt.getTime()}`,
];

const parents: [...string[], string, string] = [
...f.parents,
rootInternalId,
getRepositoryInternalId(repoId),
];
// Time to upload the file to the data source.
await upsertDataSourceDocument({
dataSourceConfig,
Expand All @@ -1155,11 +1165,8 @@ export async function githubCodeSyncActivity({
documentUrl: f.sourceUrl,
timestampMs: codeSyncStartedAt.getTime(),
tags,
parents: [
...f.parents,
rootInternalId,
getRepositoryInternalId(repoId),
],
parents,
parentId: parents[1],
loggerArgs: logger.bindings(),
upsertContext: {
sync_type: isBatchSync ? "batch" : "incremental",
Expand Down Expand Up @@ -1198,14 +1205,16 @@ export async function githubCodeSyncActivity({
Context.current().heartbeat();
const parentInternalId = d.parentInternalId || rootInternalId;

const parents: [...string[], string, string] = [
...d.parents,
getCodeRootInternalId(repoId),
getRepositoryInternalId(repoId),
];
await upsertDataSourceFolder({
dataSourceConfig,
folderId: d.internalId,
parents: [
...d.parents,
getCodeRootInternalId(repoId),
getRepositoryInternalId(repoId),
],
parents,
parentId: parents[1],
title: d.dirName,
mimeType: "application/vnd.dust.github.code.directory",
});
Expand Down Expand Up @@ -1346,6 +1355,7 @@ export async function githubUpsertRepositoryFolderActivity({
folderId: getRepositoryInternalId(repoId),
title: repoName,
parents: [getRepositoryInternalId(repoId)],
parentId: null,
mimeType: "application/vnd.dust.github.repository",
});
}
Expand All @@ -1366,6 +1376,7 @@ export async function githubUpsertIssuesFolderActivity({
folderId: getIssuesInternalId(repoId),
title: "Issues",
parents: [getIssuesInternalId(repoId), getRepositoryInternalId(repoId)],
parentId: getRepositoryInternalId(repoId),
mimeType: "application/vnd.dust.github.issues",
});
}
Expand All @@ -1389,6 +1400,7 @@ export async function githubUpsertDiscussionsFolderActivity({
getDiscussionsInternalId(repoId),
getRepositoryInternalId(repoId),
],
parentId: getRepositoryInternalId(repoId),
mimeType: "application/vnd.dust.github.discussions",
});
}
Expand All @@ -1409,6 +1421,7 @@ export async function githubUpsertCodeRootFolderActivity({
folderId: getCodeRootInternalId(repoId),
title: "Code",
parents: [getCodeRootInternalId(repoId), getRepositoryInternalId(repoId)],
parentId: getRepositoryInternalId(repoId),
mimeType: "application/vnd.dust.github.code.root",
});
}
2 changes: 2 additions & 0 deletions connectors/src/connectors/google_drive/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,7 @@ export async function incrementalSync(
dataSourceConfig,
folderId: getInternalId(driveFile.id),
parents,
parentId: parents[1] || null,
title: driveFile.name ?? "",
mimeType: "application/vnd.dust.googledrive.folder",
});
Expand Down Expand Up @@ -856,6 +857,7 @@ export async function markFolderAsVisited(
dataSourceConfig,
folderId: getInternalId(file.id),
parents,
parentId: parents[1] || null,
title: file.name ?? "",
mimeType: "application/vnd.dust.googledrive.folder",
});
Expand Down
1 change: 1 addition & 0 deletions connectors/src/connectors/google_drive/temporal/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ async function upsertGdriveDocument(
timestampMs: file.updatedAtMs,
tags,
parents,
parentId: parents[1] || null,
upsertContext: {
sync_type: isBatchSync ? "batch" : "incremental",
},
Expand Down
19 changes: 9 additions & 10 deletions connectors/src/connectors/intercom/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ export async function getParentIdsForArticle({
connectorId: number;
parentCollectionId: string;
helpCenterId: string;
}) {
}): Promise<[string, string, ...string[], string]> {
// Get collection parents
const collectionParents = await getParentIdsForCollection({
connectorId,
Expand All @@ -168,11 +168,8 @@ export async function getParentIdsForCollection({
connectorId: number;
collectionId: string;
helpCenterId: string;
}) {
// Initialize the internal IDs array with the collection ID.
const parentIds = [
getHelpCenterCollectionInternalId(connectorId, collectionId),
];
}): Promise<[string, ...string[], string]> {
const parentIds = [];

// Fetch and add any parent collection Ids.
let currentParentId = collectionId;
Expand All @@ -196,8 +193,10 @@ export async function getParentIdsForCollection({
);
}

// Add the help center internal ID.
parentIds.push(getHelpCenterInternalId(connectorId, helpCenterId));

return parentIds;
// Add the collection ID and the help center internal ID.
return [
getHelpCenterCollectionInternalId(connectorId, collectionId),
...parentIds,
getHelpCenterInternalId(connectorId, helpCenterId),
];
}
9 changes: 5 additions & 4 deletions connectors/src/connectors/intercom/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,12 @@ import {
import { dataSourceConfigFromConnector } from "@connectors/lib/api/data_source_config";
import { concurrentExecutor } from "@connectors/lib/async_utils";
import { upsertDataSourceFolder } from "@connectors/lib/data_sources";
import {
IntercomConversation,
IntercomWorkspace,
} from "@connectors/lib/models/intercom";
import {
IntercomCollection,
IntercomConversation,
IntercomHelpCenter,
IntercomTeam,
IntercomWorkspace,
} from "@connectors/lib/models/intercom";
import { syncStarted, syncSucceeded } from "@connectors/lib/sync_status";
import logger from "@connectors/logger/logger";
Expand Down Expand Up @@ -177,6 +175,7 @@ export async function syncHelpCenterOnlyActivity({
folderId: helpCenterInternalId,
title: helpCenterOnIntercom.display_name || "Help Center",
parents: [helpCenterInternalId],
parentId: null,
mimeType: getDataSourceNodeMimeType("HELP_CENTER"),
});

Expand Down Expand Up @@ -509,6 +508,7 @@ export async function syncTeamOnlyActivity({
folderId: teamInternalId,
title: teamOnIntercom.name,
parents: [teamInternalId, getTeamsInternalId(connectorId)],
parentId: getTeamsInternalId(connectorId),
mimeType: getDataSourceNodeMimeType("TEAM"),
});

Expand Down Expand Up @@ -743,6 +743,7 @@ export async function upsertIntercomTeamsFolderActivity({
folderId: getTeamsInternalId(connectorId),
title: "Conversations",
parents: [getTeamsInternalId(connectorId)],
parentId: null,
mimeType: getDataSourceNodeMimeType("CONVERSATIONS_FOLDER"),
});
}
13 changes: 8 additions & 5 deletions connectors/src/connectors/intercom/temporal/sync_conversation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -306,11 +306,13 @@ export async function syncConversation({
// parents in the Core datasource map the internal ids that are used in the permission system
// they self reference the document id
const documentId = getConversationInternalId(connectorId, conversation.id);
const parents = [documentId];
if (conversationTeamId) {
parents.push(getTeamInternalId(connectorId, conversationTeamId));
}
parents.push(getTeamsInternalId(connectorId));
const parents: [string, ...string[], string] = [
documentId,
...(conversationTeamId
? [getTeamInternalId(connectorId, conversationTeamId)]
: []),
getTeamsInternalId(connectorId),
];

await upsertDataSourceDocument({
dataSourceConfig,
Expand All @@ -320,6 +322,7 @@ export async function syncConversation({
timestampMs: updatedAtDate.getTime(),
tags: datasourceTags,
parents,
parentId: parents[1],
loggerArgs: {
...loggerArgs,
conversationId: conversation.id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ export async function upsertCollectionWithChildren({
folderId: internalCollectionId,
title: collection.name,
parents: collectionParents,
parentId: collectionParents.length > 2 ? collectionParents[1] : null,
parentId: collectionParents[1],
mimeType: getDataSourceNodeMimeType("COLLECTION"),
});

Expand Down Expand Up @@ -420,6 +420,7 @@ export async function upsertArticle({
`updatedAt:${updatedAtDate.getTime()}`,
],
parents,
parentId: parents[1],
loggerArgs: {
...loggerArgs,
articleId: article.id,
Expand Down
3 changes: 3 additions & 0 deletions connectors/src/connectors/microsoft/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ export async function getRootNodesToSyncFromResources(
dataSourceConfig,
folderId: createdOrUpdatedResource.internalId,
parents: [createdOrUpdatedResource.internalId],
parentId: null,
title: createdOrUpdatedResource.name ?? "",
mimeType: "application/vnd.dust.microsoft.folder",
}),
Expand Down Expand Up @@ -477,6 +478,7 @@ export async function syncFiles({
dataSourceConfig,
folderId: createdOrUpdatedResource.internalId,
parents: [createdOrUpdatedResource.internalId, ...parents],
parentId: parents[0],
title: createdOrUpdatedResource.name ?? "",
mimeType: "application/vnd.dust.microsoft.folder",
}),
Expand Down Expand Up @@ -650,6 +652,7 @@ export async function syncDeltaForRootNodesInDrive({
dataSourceConfig,
folderId: blob.internalId,
parents: [blob.internalId],
parentId: null,
title: blob.name ?? "",
mimeType: "application/vnd.dust.microsoft.folder",
});
Expand Down
3 changes: 2 additions & 1 deletion connectors/src/connectors/microsoft/temporal/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ export async function syncOneFile({
timestampMs: upsertTimestampMs,
tags,
parents,
parentId: parents[1] || null,
upsertContext: {
sync_type: isBatchSync ? "batch" : "incremental",
},
Expand Down Expand Up @@ -352,7 +353,7 @@ export async function getParents({
connectorId: ModelId;
internalId: string;
startSyncTs: number;
}): Promise<string[]> {
}): Promise<[string, ...string[]]> {
const parentInternalId = await getParentId(
connectorId,
internalId,
Expand Down
12 changes: 7 additions & 5 deletions connectors/src/connectors/webcrawler/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -279,15 +279,16 @@ export async function crawlWebsiteByConnectorId(connectorId: ModelId) {
lastSeenAt: new Date(),
});

// parent folder ids of the page are in hierarchy order from the
// page to the root so for the current folder, its parents start at
// index+1 (including itself as first parent) and end at the root
const parents = parentFolderIds.slice(index + 1);
await upsertDataSourceFolder({
dataSourceConfig,
folderId: webCrawlerFolder.internalId,
timestampMs: webCrawlerFolder.updatedAt.getTime(),

// parent folder ids of the page are in hierarchy order from the
// page to the root so for the current folder, its parents start at
// index+1 (including itself as first parent) and end at the root
parents: parentFolderIds.slice(index + 1),
parents,
parentId: parents[1] || null,
title: folder,
mimeType: "application/vnd.dust.webcrawler.folder",
});
Expand Down Expand Up @@ -363,6 +364,7 @@ export async function crawlWebsiteByConnectorId(connectorId: ModelId) {
timestampMs: new Date().getTime(),
tags: [`title:${stripNullBytes(pageTitle)}`],
parents: parentFolderIds,
parentId: parentFolderIds[1] || null,
upsertContext: {
sync_type: "batch",
},
Expand Down
Loading

0 comments on commit 7b3aeb2

Please sign in to comment.