Skip to content

Commit

Permalink
implement missing cases for github
Browse files Browse the repository at this point in the history
  • Loading branch information
aubin-tchoi committed Dec 19, 2024
1 parent ac54577 commit 42bab5a
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 8 deletions.
94 changes: 86 additions & 8 deletions connectors/src/connectors/github/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type {
Result,
} from "@dust-tt/types";
import { assertNever, Err, Ok } from "@dust-tt/types";
import { Op } from "sequelize";

import type { GithubRepo } from "@connectors/connectors/github/lib/github_api";
import {
Expand All @@ -18,7 +19,11 @@ import {
} from "@connectors/connectors/github/lib/hierarchy";
import {
getCodeRootInternalId,
getDiscussionInternalId,
getDiscussionsInternalId,
getGithubIdsFromDiscussionInternalId,
getGithubIdsFromIssueInternalId,
getIssueInternalId,
getIssuesInternalId,
getRepositoryInternalId,
matchGithubInternalIdType,
Expand Down Expand Up @@ -436,10 +441,12 @@ export class GithubConnectorManager extends BaseConnectorManager<null> {

return new Ok(nodes);
}
// we should never be getting issues, discussions or code files as parent
// we should never be getting issues, discussions, code files, single issues or discussions as parent
case "REPO_ISSUES":
case "REPO_DISCUSSIONS":
case "REPO_CODE_FILE":
case "REPO_DISCUSSION":
case "REPO_ISSUE":
return new Err(new Error("Invalid parent ID."));
default:
assertNever(type);
Expand Down Expand Up @@ -471,6 +478,10 @@ export class GithubConnectorManager extends BaseConnectorManager<null> {
const allIssuesFromRepoIds: number[] = [];
const allDiscussionsFromRepoIds: number[] = [];

// Single issues or discussions
const issueIds: { repoId: string; issueNumber: number }[] = [];
const discussionIds: { repoId: string; discussionNumber: number }[] = [];

// The full code, or a specific folder or file in the code
const allCodeFromRepoIds: string[] = [];
const codeDirectoryIds: string[] = [];
Expand Down Expand Up @@ -500,6 +511,12 @@ export class GithubConnectorManager extends BaseConnectorManager<null> {
case "REPO_CODE_FILE":
codeFileIds.push(internalId);
break;
case "REPO_DISCUSSION":
discussionIds.push(getGithubIdsFromDiscussionInternalId(internalId));
break;
case "REPO_ISSUE":
issueIds.push(getGithubIdsFromIssueInternalId(internalId));
break;
default:
assertNever(type);
}
Expand Down Expand Up @@ -544,6 +561,22 @@ export class GithubConnectorManager extends BaseConnectorManager<null> {
}),
]);

// Issues and Discussions are also stored in the db
const [issues, discussions] = await Promise.all([
GithubIssue.findAll({
where: {
connectorId: c.id,
[Op.or]: issueIds,
},
}),
GithubDiscussion.findAll({
where: {
connectorId: c.id,
[Op.or]: discussionIds,
},
}),
]);

// Constructing Nodes for Full Repo
fullRepoIds.forEach((repoId) => {
const repo = uniqueRepos[repoId];
Expand Down Expand Up @@ -605,6 +638,46 @@ export class GithubConnectorManager extends BaseConnectorManager<null> {
});
});

issues.forEach((issue) => {
const { repoId, issueNumber } = issue;
const repo = uniqueRepos[parseInt(repoId, 10)];
if (!repo) {
return;
}
nodes.push({
provider: c.type,
internalId: getIssueInternalId(repoId, issueNumber),
parentInternalId: getIssuesInternalId(repoId),
type: "file",
title: `Issue #${issueNumber}`,
sourceUrl: repo.url + `/issues/${issueNumber}`,
expandable: false,
permission: "read",
dustDocumentId: getIssueInternalId(repoId, issueNumber),
lastUpdatedAt: issue.updatedAt.getTime(),
});
});

discussions.forEach((discussion) => {
const { repoId, discussionNumber } = discussion;
const repo = uniqueRepos[parseInt(repoId, 10)];
if (!repo) {
return;
}
nodes.push({
provider: c.type,
internalId: getDiscussionInternalId(repoId, discussionNumber),
parentInternalId: getDiscussionsInternalId(repoId),
type: "file",
title: `Discussion #${discussionNumber}`,
sourceUrl: repo.url + `/discussions/${discussionNumber}`,
expandable: false,
permission: "read",
dustDocumentId: getDiscussionInternalId(repoId, discussionNumber),
lastUpdatedAt: discussion.updatedAt.getTime(),
});
});

// Constructing Nodes for Code
fullCodeInRepos.forEach((codeRepo) => {
const repo = uniqueRepos[parseInt(codeRepo.repoId)];
Expand Down Expand Up @@ -679,13 +752,6 @@ export class GithubConnectorManager extends BaseConnectorManager<null> {
);
}

if (/^github-issue-\d+-\d+$/.test(internalId)) {
return new Ok([internalId]); // this is incorrect but matches the previous behavior, will fix in a follow-up PR
}
if (/^github-discussion-\d+-\d+$/.test(internalId)) {
return new Ok([internalId]); // this is incorrect but matches the previous behavior, will fix in a follow-up PR
}

const { type, repoId } = matchGithubInternalIdType(internalId);

switch (type) {
Expand Down Expand Up @@ -715,6 +781,18 @@ export class GithubConnectorManager extends BaseConnectorManager<null> {
);
return new Ok([internalId, ...parents]);
}
case "REPO_ISSUE":
return new Ok([
internalId,
getIssuesInternalId(repoId),
getRepositoryInternalId(repoId),
]);
case "REPO_DISCUSSION":
return new Ok([
internalId,
getDiscussionsInternalId(repoId),
getRepositoryInternalId(repoId),
]);
default: {
assertNever(type);
}
Expand Down
42 changes: 42 additions & 0 deletions connectors/src/connectors/github/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ export const GITHUB_CONTENT_NODE_TYPES = [
"REPO_CODE",
"REPO_CODE_DIR",
"REPO_CODE_FILE",
"REPO_ISSUE",
"REPO_DISCUSSION",
] as const;

export type GithubContentNodeType = (typeof GITHUB_CONTENT_NODE_TYPES)[number];
Expand All @@ -19,6 +21,28 @@ export function isGithubCodeFileId(internalId: string): boolean {
return /^github-code-\d+-file-[a-f0-9]+$/.test(internalId);
}

export function getGithubIdsFromDiscussionInternalId(internalId: string): {
repoId: string;
discussionNumber: number;
} {
const pattern = /^github-discussion-(\d+)-(\d+)$/;
return {
repoId: parseInt(internalId.replace(pattern, "$1"), 10).toString(),
discussionNumber: parseInt(internalId.replace(pattern, "$2"), 10),
};
}

export function getGithubIdsFromIssueInternalId(internalId: string): {
repoId: string;
issueNumber: number;
} {
const pattern = /^github-issue-(\d+)-(\d+)$/;
return {
repoId: parseInt(internalId.replace(pattern, "$1"), 10).toString(),
issueNumber: parseInt(internalId.replace(pattern, "$2"), 10),
};
}

/**
* Gets the type of the Github content node from its internal id.
*/
Expand Down Expand Up @@ -74,6 +98,24 @@ export function matchGithubInternalIdType(internalId: string): {
),
};
}
if (/^github-issue-\d+-\d+$/.test(internalId)) {
return {
type: "REPO_ISSUE",
repoId: parseInt(
internalId.replace(/^github-issue-(\d+)-\d+$/, "$1"),
10
),
};
}
if (/^github-discussion-\d+-\d+$/.test(internalId)) {
return {
type: "REPO_DISCUSSION",
repoId: parseInt(
internalId.replace(/^github-discussion-(\d+)-\d+$/, "$1"),
10
),
};
}
throw new Error(`Invalid Github internal id: ${internalId}`);
}

Expand Down

0 comments on commit 42bab5a

Please sign in to comment.