From 54080ea16ad99b851324a10977db73c6ae8e8052 Mon Sep 17 00:00:00 2001 From: Stanislas Polu Date: Tue, 2 Jan 2024 17:24:48 +0100 Subject: [PATCH] GitHub code support: Move issues/discussions to separate parents in github connectors (#3032) * Add issues/discussions parents at aggregation and permission retrieval * make github nested, remove default prefix for channels * re-add # to channel title for slack connector permissions * dry * add migration to add parents for github issues and discussions * lint --- ...2_github_add_issues_discussions_parents.ts | 99 ++++++++++++++ connectors/src/connectors/github/index.ts | 123 +++++++++++++----- .../src/connectors/github/lib/github_api.ts | 27 +++- .../connectors/github/temporal/activities.ts | 10 +- connectors/src/connectors/slack/index.ts | 2 +- connectors/src/lib/models/github.ts | 2 +- front/components/ConnectorPermissionsTree.tsx | 3 +- .../DataSourceResourceSelectorTree.tsx | 5 +- front/lib/connector_providers.ts | 2 +- 9 files changed, 227 insertions(+), 46 deletions(-) create mode 100644 connectors/migrations/20240102_github_add_issues_discussions_parents.ts diff --git a/connectors/migrations/20240102_github_add_issues_discussions_parents.ts b/connectors/migrations/20240102_github_add_issues_discussions_parents.ts new file mode 100644 index 000000000000..cb70761135f5 --- /dev/null +++ b/connectors/migrations/20240102_github_add_issues_discussions_parents.ts @@ -0,0 +1,99 @@ +import { + getDiscussionDocumentId, + getIssueDocumentId, +} from "@connectors/connectors/github/temporal/activities"; +import { updateDocumentParentsField } from "@connectors/lib/data_sources"; +import { Connector } from "@connectors/lib/models"; +import { GithubDiscussion, GithubIssue } from "@connectors/lib/models/github"; + +const { LIVE = null } = process.env; + +async function main() { + const connectors = await Connector.findAll({ + where: { + type: "github", + }, + }); + + for (const connector of connectors) { + console.log(`>> Updating connector: ${connector.id}`); + await updateParents(connector); + } +} + +const CHUNK_SIZE = 32; + +async function updateParents(connector: Connector) { + const discussions = await GithubDiscussion.findAll({ + where: { + connectorId: connector.id, + }, + }); + + const discussionChunks = []; + for (let i = 0; i < discussions.length; i += CHUNK_SIZE) { + discussionChunks.push(discussions.slice(i, i + CHUNK_SIZE)); + } + + for (const chunk of discussionChunks) { + await Promise.all( + chunk.map(async (d) => { + const documentId = getDiscussionDocumentId( + d.repoId, + d.discussionNumber + ); + const parents = [documentId, `${d.repoId}-discussions`, d.repoId]; + if (LIVE) { + await updateDocumentParentsField({ + dataSourceConfig: connector, + documentId, + parents, + }); + console.log(`Updated discussion ${documentId} with: ${parents}`); + } else { + console.log(`Would update ${documentId} with: ${parents}`); + } + }) + ); + } + + const issues = await GithubIssue.findAll({ + where: { + connectorId: connector.id, + }, + }); + + const issueChunks = []; + for (let i = 0; i < issues.length; i += CHUNK_SIZE) { + issueChunks.push(issues.slice(i, i + CHUNK_SIZE)); + } + + for (const chunk of issueChunks) { + await Promise.all( + chunk.map(async (i) => { + const documentId = getIssueDocumentId(i.repoId, i.issueNumber); + const parents = [documentId, `${i.repoId}-issues`, i.repoId]; + if (LIVE) { + await updateDocumentParentsField({ + dataSourceConfig: connector, + documentId, + parents, + }); + console.log(`Updated issue ${documentId} with: ${parents}`); + } else { + console.log(`Would update ${documentId} with: ${parents}`); + } + }) + ); + } +} + +main() + .then(() => { + console.log("Done"); + process.exit(0); + }) + .catch((err) => { + console.error(err); + process.exit(1); + }); diff --git a/connectors/src/connectors/github/index.ts b/connectors/src/connectors/github/index.ts index e47bea557bca..e307c2746262 100644 --- a/connectors/src/connectors/github/index.ts +++ b/connectors/src/connectors/github/index.ts @@ -1,6 +1,7 @@ import { ModelId } from "@dust-tt/types"; import { + getRepo, getReposPage, validateInstallationId, } from "@connectors/connectors/github/lib/github_api"; @@ -8,6 +9,7 @@ import { launchGithubFullSyncWorkflow } from "@connectors/connectors/github/temp import { Connector, sequelize_conn } from "@connectors/lib/models"; import { GithubConnectorState, + GithubDiscussion, GithubIssue, } from "@connectors/lib/models/github"; import { Err, Ok, Result } from "@connectors/lib/result"; @@ -252,14 +254,6 @@ export async function retrieveGithubConnectorPermissions({ }: Parameters[0]): Promise< Result > { - if (parentInternalId) { - return new Err( - new Error( - "Github connector does not support permission retrieval with `parentInternalId`" - ) - ); - } - const c = await Connector.findOne({ where: { id: connectorId, @@ -272,36 +266,105 @@ export async function retrieveGithubConnectorPermissions({ const githubInstallationId = c.connectionId; - let resources: ConnectorResource[] = []; - let pageNumber = 1; // 1-indexed - for (;;) { - const page = await getReposPage(githubInstallationId, pageNumber); - pageNumber += 1; - if (page.length === 0) { - break; + if (!parentInternalId) { + // No parentInternalId: we return the repositories. + + let resources: ConnectorResource[] = []; + let pageNumber = 1; // 1-indexed + for (;;) { + const page = await getReposPage(githubInstallationId, pageNumber); + pageNumber += 1; + if (page.length === 0) { + break; + } + + resources = resources.concat( + page.map((repo) => ({ + provider: c.type, + internalId: repo.id.toString(), + parentInternalId: null, + type: "folder", + title: repo.name, + sourceUrl: repo.url, + expandable: true, + permission: "read" as ConnectorPermission, + dustDocumentId: null, + lastUpdatedAt: null, + })) + ); + } + + resources.sort((a, b) => { + return a.title.localeCompare(b.title); + }); + + return new Ok(resources); + } else { + // If parentInternalId is set this means we are fetching the children of a repository. For now + // we only support issues and discussions. + const repoId = parseInt(parentInternalId, 10); + if (isNaN(repoId)) { + return new Err(new Error(`Invalid repoId: ${parentInternalId}`)); } - resources = resources.concat( - page.map((repo) => ({ + const [latestDiscussion, latestIssue, repo] = await Promise.all([ + (async () => { + return await GithubDiscussion.findOne({ + where: { + connectorId: c.id, + repoId: repoId.toString(), + }, + limit: 1, + order: [["updatedAt", "DESC"]], + }); + })(), + (async () => { + return await GithubIssue.findOne({ + where: { + connectorId: c.id, + repoId: repoId.toString(), + }, + limit: 1, + order: [["updatedAt", "DESC"]], + }); + })(), + getRepo(githubInstallationId, repoId), + ]); + + const resources: ConnectorResource[] = []; + + if (latestIssue) { + resources.push({ provider: c.type, - internalId: repo.id.toString(), - parentInternalId: null, - type: "folder", - title: repo.name, - sourceUrl: repo.url, + internalId: `${repoId}-issues`, + parentInternalId, + type: "database", + title: "Issues", + sourceUrl: repo.url + "/issues", expandable: false, permission: "read" as ConnectorPermission, dustDocumentId: null, - lastUpdatedAt: null, - })) - ); - } + lastUpdatedAt: latestIssue.updatedAt.getTime(), + }); + } - resources.sort((a, b) => { - return a.title.localeCompare(b.title); - }); + if (latestDiscussion) { + resources.push({ + provider: c.type, + internalId: `${repoId}-discussions`, + parentInternalId, + type: "channel", + title: "Discussions", + sourceUrl: repo.url + "/discussions", + expandable: false, + permission: "read" as ConnectorPermission, + dustDocumentId: null, + lastUpdatedAt: latestDiscussion.updatedAt.getTime(), + }); + } - return new Ok(resources); + return new Ok(resources); + } } export async function retrieveGithubReposTitles( diff --git a/connectors/src/connectors/github/lib/github_api.ts b/connectors/src/connectors/github/lib/github_api.ts index 5174b8f5548c..b78edbc64671 100644 --- a/connectors/src/connectors/github/lib/github_api.ts +++ b/connectors/src/connectors/github/lib/github_api.ts @@ -118,6 +118,31 @@ export async function getReposPage( })); } +export async function getRepo( + installationId: string, + repoId: number +): Promise { + const octokit = await getOctokit(installationId); + + const { data: r } = await octokit.request(`GET /repositories/:repo_id`, { + repo_id: repoId, + }); + + return { + id: r.id, + name: r.name, + private: r.private, + url: r.html_url, + createdAt: r.created_at ? new Date(r.created_at) : null, + updatedAt: r.updated_at ? new Date(r.updated_at) : null, + description: r.description, + owner: { + id: r.owner.id, + login: r.owner.login, + }, + }; +} + export async function getRepoIssuesPage( installationId: string, repoName: string, @@ -326,7 +351,7 @@ export async function getDiscussionCommentsPage( } } } - } + } `, { owner: login, diff --git a/connectors/src/connectors/github/temporal/activities.ts b/connectors/src/connectors/github/temporal/activities.ts index 9d16645343f2..06e3599b33f8 100644 --- a/connectors/src/connectors/github/temporal/activities.ts +++ b/connectors/src/connectors/github/temporal/activities.ts @@ -219,10 +219,7 @@ export async function githubUpsertIssueActivity( // Therefore as a special case we use getIssueDocumentId() to get a parent string // The repo id from github is globally unique so used as-is, as per // convention to use the external id string. - parents: [ - getIssueDocumentId(repoId.toString(), issue.number), - repoId.toString(), - ], + parents: [documentId, `${repoId}-issues`, repoId.toString()], retries: 3, delayBetweenRetriesMs: 500, loggerArgs: { ...loggerArgs, provider: "github" }, @@ -397,10 +394,7 @@ export async function githubUpsertDiscussionActivity( // as a special case we use getDiscussionDocumentId() to get a parent string // The repo id from github is globally unique so used as-is, as per // convention to use the external id string. - parents: [ - getDiscussionDocumentId(repoId.toString(), discussionNumber), - repoId.toString(), - ], + parents: [documentId, `${repoId}-discussions`, repoId.toString()], retries: 3, delayBetweenRetriesMs: 500, loggerArgs: { ...loggerArgs, provider: "github" }, diff --git a/connectors/src/connectors/slack/index.ts b/connectors/src/connectors/slack/index.ts index fffad4c2b428..ddb78dee3e4e 100644 --- a/connectors/src/connectors/slack/index.ts +++ b/connectors/src/connectors/slack/index.ts @@ -483,7 +483,7 @@ export async function retrieveSlackConnectorPermissions({ internalId: ch.slackChannelId, parentInternalId: null, type: "channel", - title: ch.slackChannelName, + title: `#${ch.slackChannelName}`, sourceUrl: `https://app.slack.com/client/${slackConfig.slackTeamId}/${ch.slackChannelId}`, expandable: false, permission: ch.permission, diff --git a/connectors/src/lib/models/github.ts b/connectors/src/lib/models/github.ts index 05d9b0e72e3b..23064968666e 100644 --- a/connectors/src/lib/models/github.ts +++ b/connectors/src/lib/models/github.ts @@ -146,7 +146,7 @@ GithubDiscussion.init( indexes: [ { fields: ["repoId", "discussionNumber", "connectorId"], unique: true }, { fields: ["connectorId"] }, - { fields: ["repoId"] }, + { fields: ["repoId", "updatedAt"] }, ], modelName: "github_discussions", } diff --git a/front/components/ConnectorPermissionsTree.tsx b/front/components/ConnectorPermissionsTree.tsx index 88367a04f769..e4efb86e64d2 100644 --- a/front/components/ConnectorPermissionsTree.tsx +++ b/front/components/ConnectorPermissionsTree.tsx @@ -95,7 +95,6 @@ function PermissionTreeChildren({ return ( {resources.map((r, i) => { - const titlePrefix = r.type === "channel" ? "#" : ""; return ( {resources.map((r) => { const IconComponent = getIconForType(r.type); - const titlePrefix = r.type === "channel" ? "#" : ""; const checkStatus = getCheckStatus(r.internalId); return (
@@ -189,7 +188,9 @@ function DataSourceResourceSelectorChildren({
- {`${titlePrefix}${r.title}`} + + {r.title} +