Skip to content

Commit

Permalink
GitHub code support: Move issues/discussions to separate parents in g…
Browse files Browse the repository at this point in the history
…ithub connectors (#3032)

* Add issues/discussions parents at aggregation and permission retrieval

* make github nested, remove default prefix for channels

* re-add # to channel title for slack connector permissions

* dry

* add migration to add parents for github issues and discussions

* lint
  • Loading branch information
spolu authored Jan 2, 2024
1 parent 5bf218e commit 54080ea
Show file tree
Hide file tree
Showing 9 changed files with 227 additions and 46 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import {
getDiscussionDocumentId,
getIssueDocumentId,
} from "@connectors/connectors/github/temporal/activities";
import { updateDocumentParentsField } from "@connectors/lib/data_sources";
import { Connector } from "@connectors/lib/models";
import { GithubDiscussion, GithubIssue } from "@connectors/lib/models/github";

const { LIVE = null } = process.env;

async function main() {
const connectors = await Connector.findAll({
where: {
type: "github",
},
});

for (const connector of connectors) {
console.log(`>> Updating connector: ${connector.id}`);
await updateParents(connector);
}
}

const CHUNK_SIZE = 32;

async function updateParents(connector: Connector) {
const discussions = await GithubDiscussion.findAll({
where: {
connectorId: connector.id,
},
});

const discussionChunks = [];
for (let i = 0; i < discussions.length; i += CHUNK_SIZE) {
discussionChunks.push(discussions.slice(i, i + CHUNK_SIZE));
}

for (const chunk of discussionChunks) {
await Promise.all(
chunk.map(async (d) => {
const documentId = getDiscussionDocumentId(
d.repoId,
d.discussionNumber
);
const parents = [documentId, `${d.repoId}-discussions`, d.repoId];
if (LIVE) {
await updateDocumentParentsField({
dataSourceConfig: connector,
documentId,
parents,
});
console.log(`Updated discussion ${documentId} with: ${parents}`);
} else {
console.log(`Would update ${documentId} with: ${parents}`);
}
})
);
}

const issues = await GithubIssue.findAll({
where: {
connectorId: connector.id,
},
});

const issueChunks = [];
for (let i = 0; i < issues.length; i += CHUNK_SIZE) {
issueChunks.push(issues.slice(i, i + CHUNK_SIZE));
}

for (const chunk of issueChunks) {
await Promise.all(
chunk.map(async (i) => {
const documentId = getIssueDocumentId(i.repoId, i.issueNumber);
const parents = [documentId, `${i.repoId}-issues`, i.repoId];
if (LIVE) {
await updateDocumentParentsField({
dataSourceConfig: connector,
documentId,
parents,
});
console.log(`Updated issue ${documentId} with: ${parents}`);
} else {
console.log(`Would update ${documentId} with: ${parents}`);
}
})
);
}
}

main()
.then(() => {
console.log("Done");
process.exit(0);
})
.catch((err) => {
console.error(err);
process.exit(1);
});
123 changes: 93 additions & 30 deletions connectors/src/connectors/github/index.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import { ModelId } from "@dust-tt/types";

import {
getRepo,
getReposPage,
validateInstallationId,
} from "@connectors/connectors/github/lib/github_api";
import { launchGithubFullSyncWorkflow } from "@connectors/connectors/github/temporal/client";
import { Connector, sequelize_conn } from "@connectors/lib/models";
import {
GithubConnectorState,
GithubDiscussion,
GithubIssue,
} from "@connectors/lib/models/github";
import { Err, Ok, Result } from "@connectors/lib/result";
Expand Down Expand Up @@ -252,14 +254,6 @@ export async function retrieveGithubConnectorPermissions({
}: Parameters<ConnectorPermissionRetriever>[0]): Promise<
Result<ConnectorResource[], Error>
> {
if (parentInternalId) {
return new Err(
new Error(
"Github connector does not support permission retrieval with `parentInternalId`"
)
);
}

const c = await Connector.findOne({
where: {
id: connectorId,
Expand All @@ -272,36 +266,105 @@ export async function retrieveGithubConnectorPermissions({

const githubInstallationId = c.connectionId;

let resources: ConnectorResource[] = [];
let pageNumber = 1; // 1-indexed
for (;;) {
const page = await getReposPage(githubInstallationId, pageNumber);
pageNumber += 1;
if (page.length === 0) {
break;
if (!parentInternalId) {
// No parentInternalId: we return the repositories.

let resources: ConnectorResource[] = [];
let pageNumber = 1; // 1-indexed
for (;;) {
const page = await getReposPage(githubInstallationId, pageNumber);
pageNumber += 1;
if (page.length === 0) {
break;
}

resources = resources.concat(
page.map((repo) => ({
provider: c.type,
internalId: repo.id.toString(),
parentInternalId: null,
type: "folder",
title: repo.name,
sourceUrl: repo.url,
expandable: true,
permission: "read" as ConnectorPermission,
dustDocumentId: null,
lastUpdatedAt: null,
}))
);
}

resources.sort((a, b) => {
return a.title.localeCompare(b.title);
});

return new Ok(resources);
} else {
// If parentInternalId is set this means we are fetching the children of a repository. For now
// we only support issues and discussions.
const repoId = parseInt(parentInternalId, 10);
if (isNaN(repoId)) {
return new Err(new Error(`Invalid repoId: ${parentInternalId}`));
}

resources = resources.concat(
page.map((repo) => ({
const [latestDiscussion, latestIssue, repo] = await Promise.all([
(async () => {
return await GithubDiscussion.findOne({
where: {
connectorId: c.id,
repoId: repoId.toString(),
},
limit: 1,
order: [["updatedAt", "DESC"]],
});
})(),
(async () => {
return await GithubIssue.findOne({
where: {
connectorId: c.id,
repoId: repoId.toString(),
},
limit: 1,
order: [["updatedAt", "DESC"]],
});
})(),
getRepo(githubInstallationId, repoId),
]);

const resources: ConnectorResource[] = [];

if (latestIssue) {
resources.push({
provider: c.type,
internalId: repo.id.toString(),
parentInternalId: null,
type: "folder",
title: repo.name,
sourceUrl: repo.url,
internalId: `${repoId}-issues`,
parentInternalId,
type: "database",
title: "Issues",
sourceUrl: repo.url + "/issues",
expandable: false,
permission: "read" as ConnectorPermission,
dustDocumentId: null,
lastUpdatedAt: null,
}))
);
}
lastUpdatedAt: latestIssue.updatedAt.getTime(),
});
}

resources.sort((a, b) => {
return a.title.localeCompare(b.title);
});
if (latestDiscussion) {
resources.push({
provider: c.type,
internalId: `${repoId}-discussions`,
parentInternalId,
type: "channel",
title: "Discussions",
sourceUrl: repo.url + "/discussions",
expandable: false,
permission: "read" as ConnectorPermission,
dustDocumentId: null,
lastUpdatedAt: latestDiscussion.updatedAt.getTime(),
});
}

return new Ok(resources);
return new Ok(resources);
}
}

export async function retrieveGithubReposTitles(
Expand Down
27 changes: 26 additions & 1 deletion connectors/src/connectors/github/lib/github_api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,31 @@ export async function getReposPage(
}));
}

export async function getRepo(
installationId: string,
repoId: number
): Promise<GithubRepo> {
const octokit = await getOctokit(installationId);

const { data: r } = await octokit.request(`GET /repositories/:repo_id`, {
repo_id: repoId,
});

return {
id: r.id,
name: r.name,
private: r.private,
url: r.html_url,
createdAt: r.created_at ? new Date(r.created_at) : null,
updatedAt: r.updated_at ? new Date(r.updated_at) : null,
description: r.description,
owner: {
id: r.owner.id,
login: r.owner.login,
},
};
}

export async function getRepoIssuesPage(
installationId: string,
repoName: string,
Expand Down Expand Up @@ -326,7 +351,7 @@ export async function getDiscussionCommentsPage(
}
}
}
}
}
`,
{
owner: login,
Expand Down
10 changes: 2 additions & 8 deletions connectors/src/connectors/github/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,7 @@ export async function githubUpsertIssueActivity(
// Therefore as a special case we use getIssueDocumentId() to get a parent string
// The repo id from github is globally unique so used as-is, as per
// convention to use the external id string.
parents: [
getIssueDocumentId(repoId.toString(), issue.number),
repoId.toString(),
],
parents: [documentId, `${repoId}-issues`, repoId.toString()],
retries: 3,
delayBetweenRetriesMs: 500,
loggerArgs: { ...loggerArgs, provider: "github" },
Expand Down Expand Up @@ -397,10 +394,7 @@ export async function githubUpsertDiscussionActivity(
// as a special case we use getDiscussionDocumentId() to get a parent string
// The repo id from github is globally unique so used as-is, as per
// convention to use the external id string.
parents: [
getDiscussionDocumentId(repoId.toString(), discussionNumber),
repoId.toString(),
],
parents: [documentId, `${repoId}-discussions`, repoId.toString()],
retries: 3,
delayBetweenRetriesMs: 500,
loggerArgs: { ...loggerArgs, provider: "github" },
Expand Down
2 changes: 1 addition & 1 deletion connectors/src/connectors/slack/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ export async function retrieveSlackConnectorPermissions({
internalId: ch.slackChannelId,
parentInternalId: null,
type: "channel",
title: ch.slackChannelName,
title: `#${ch.slackChannelName}`,
sourceUrl: `https://app.slack.com/client/${slackConfig.slackTeamId}/${ch.slackChannelId}`,
expandable: false,
permission: ch.permission,
Expand Down
2 changes: 1 addition & 1 deletion connectors/src/lib/models/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ GithubDiscussion.init(
indexes: [
{ fields: ["repoId", "discussionNumber", "connectorId"], unique: true },
{ fields: ["connectorId"] },
{ fields: ["repoId"] },
{ fields: ["repoId", "updatedAt"] },
],
modelName: "github_discussions",
}
Expand Down
3 changes: 1 addition & 2 deletions front/components/ConnectorPermissionsTree.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ function PermissionTreeChildren({
return (
<Tree isLoading={isResourcesLoading}>
{resources.map((r, i) => {
const titlePrefix = r.type === "channel" ? "#" : "";
return (
<Tree.Item
key={r.internalId}
Expand All @@ -107,7 +106,7 @@ function PermissionTreeChildren({
}));
}}
type={r.expandable ? "node" : "leaf"}
label={`${titlePrefix}${r.title}`}
label={r.title}
variant={r.type}
className="whitespace-nowrap"
checkbox={
Expand Down
5 changes: 3 additions & 2 deletions front/components/DataSourceResourceSelectorTree.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ function DataSourceResourceSelectorChildren({
<div className="flex-1 space-y-1">
{resources.map((r) => {
const IconComponent = getIconForType(r.type);
const titlePrefix = r.type === "channel" ? "#" : "";
const checkStatus = getCheckStatus(r.internalId);
return (
<div key={r.internalId}>
Expand Down Expand Up @@ -189,7 +188,9 @@ function DataSourceResourceSelectorChildren({
<div>
<IconComponent className="h-5 w-5 text-slate-300" />
</div>
<span className="ml-2 line-clamp-1 text-sm font-medium text-element-900">{`${titlePrefix}${r.title}`}</span>
<span className="ml-2 line-clamp-1 text-sm font-medium text-element-900">
{r.title}
</span>
<div className="ml-32 flex-grow">
<Checkbox
variant="checkable"
Expand Down
2 changes: 1 addition & 1 deletion front/lib/connector_providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ export const CONNECTOR_CONFIGURATIONS: Record<
limitations:
"Dust only gathers data from issues, discussions and top-level pull requests (but not in-code comments in pull requests, nor the actual source code or other Github data).",
logoComponent: GithubLogo,
isNested: false,
isNested: true,
},
intercom: {
name: "Intercom",
Expand Down

0 comments on commit 54080ea

Please sign in to comment.