Skip to content

Commit

Permalink
[connectors] - fix(microsoft): data sour selection tree (#6321)
Browse files Browse the repository at this point in the history
* [connectors/microsoft] - feature: extend Microsoft connector to include file nodes

 - Add support for listing file nodes alongside folder nodes in Microsoft connector responses
 - Implement permission filtering logic for both files and folders when constructing content nodes
 - Create a `getFileAsContentNode` function for mapping Microsoft DriveItem to ContentNode for files
 - Enhance permission mapping to account for parent permissions when assigning to content nodes

* [connectors/microsoft] - feature: enhance node retrieval with read permission filter

 - Added support to use the "read" permission filter when retrieving child nodes
 - Removed getFileAsContentNode call and integrated getMicrosoftNodeAsContentNode for node retrieval
 - Implemented a new function to get content nodes from MicrosoftNodeModel
 - Refactored node type determination logic into getMicrosoftNodeAsContentNode function
 - Removed conditional file handling related to permission filtering within the content node retrieval process

* [connectors] - feature: expand retrieval of child nodes with content inclusion flag

 - Add an additional flag to the `retrieveChildrenNodes` function to control the inclusion of additional content
 - Expose `retrieveChildrenNodes` function for potential external use by making it exportable

* [connectors] - refactor: remove redundant fetch parameter in Microsoft connector

 - Simplified the node fetching logic by removing an unnecessary boolean parameter

* fix: lint/format

* [connectors] - feature: enhance filtering for children nodes retrieval in Microsoft connector

 - Include a new condition to filter only 'file', 'folder', and 'drive' node types when retrieving children nodes
 - Utilize Sequelize's Op.in to specify the multiple nodeType values in the query condition

* [microsoft/temporal] - fix: adjust spreadsheet syncing logic

 - Remove early return for spreadsheet mimeType to align with new sync logic
 - Pass `parentInternalId` when syncing spreadsheets and worksheets to maintain hierarchy
 - Ensure spreadsheets have the `parentInternalId` when upserted to the database for relational integrity

* [connectors/microsoft] - refactor: streamline children node retrieval in MicrosoftConnector

 - Replace direct usage of `MicrosoftNodeModel` with `MicrosoftNodeResource`
 - Introduce method `fetchNodesWithoutParents` to get nodes without parent references
 - Create `fetchChildren` method in `MicrosoftNodeResource` to encapsulate children node fetching logic
 - Adjust `retrieveChildrenNodes` function to utilize new `MicrosoftNodeResource` methods for improved readability and maintainability

---------

Co-authored-by: Jules <[email protected]>
Co-authored-by: Jules <[email protected]>
  • Loading branch information
3 people authored Jul 19, 2024
1 parent 900892c commit 60f48a5
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 31 deletions.
61 changes: 50 additions & 11 deletions connectors/src/connectors/microsoft/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
getChannelAsContentNode,
getDriveAsContentNode,
getFolderAsContentNode,
getMicrosoftNodeAsContentNode,
getSiteAsContentNode,
getTeamAsContentNode,
} from "@connectors/connectors/microsoft/lib/content_nodes";
Expand All @@ -40,6 +41,7 @@ import logger from "@connectors/logger/logger";
import { ConnectorResource } from "@connectors/resources/connector_resource";
import {
MicrosoftConfigurationResource,
MicrosoftNodeResource,
MicrosoftRootResource,
} from "@connectors/resources/microsoft_resource";
import type { DataSourceConfig } from "@connectors/types/data_source_config";
Expand Down Expand Up @@ -168,6 +170,24 @@ export class MicrosoftConnectorManager extends BaseConnectorManager<null> {
);
}

if (filterPermission === "read") {
if (!parentInternalId) {
const nodes = await MicrosoftNodeResource.fetchNodesWithoutParents();
return new Ok(
nodes.map((node) => getMicrosoftNodeAsContentNode(node, false))
);
}
const node = await MicrosoftNodeResource.fetchByInternalId(
this.connectorId,
parentInternalId
);
if (!node) {
return new Err(
new Error(`Could not find node with id ${parentInternalId}`)
);
}
return retrieveChildrenNodes(node, false);
}
const client = await getClient(connector.connectionId);
const nodes = [];

Expand Down Expand Up @@ -227,11 +247,10 @@ export class MicrosoftConnectorManager extends BaseConnectorManager<null> {
}
case "drive":
case "folder": {
const folders = (
await getAllPaginatedEntities((nextLink) =>
getFilesAndFolders(client, parentInternalId, nextLink)
)
).filter((n) => n.folder);
const filesAndFolders = await getAllPaginatedEntities((nextLink) =>
getFilesAndFolders(client, parentInternalId, nextLink)
);
const folders = filesAndFolders.filter((n) => n.folder);
nodes.push(
...folders.map((n) => getFolderAsContentNode(n, parentInternalId))
);
Expand All @@ -250,12 +269,16 @@ export class MicrosoftConnectorManager extends BaseConnectorManager<null> {
}
}

const nodesWithPermissions = nodes.map((res) => ({
...res,
permission: (selectedResources.includes(res.internalId)
? "read"
: "none") as ConnectorPermission,
}));
const nodesWithPermissions = nodes.map((res) => {
return {
...res,
permission: (selectedResources.includes(res.internalId) ||
(res.parentInternalId &&
selectedResources.includes(res.parentInternalId))
? "read"
: "none") as ConnectorPermission,
};
});

if (filterPermission) {
return new Ok(
Expand Down Expand Up @@ -495,3 +518,19 @@ export async function getClient(connectionId: NangoConnectionId) {
authProvider: (done) => done(null, msAccessToken),
});
}

export async function retrieveChildrenNodes(
microsoftNode: MicrosoftNodeResource,
expandWorksheet: boolean
): Promise<Result<ContentNode[], Error>> {
const childrenNodes = await microsoftNode.fetchChildren([
"file",
"folder",
"drive",
]);
return new Ok(
childrenNodes.map((node) =>
getMicrosoftNodeAsContentNode(node, expandWorksheet)
)
);
}
56 changes: 55 additions & 1 deletion connectors/src/connectors/microsoft/lib/content_nodes.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { ContentNode } from "@dust-tt/types";
import type { ContentNode, ContentNodeType } from "@dust-tt/types";

import {
getDriveInternalId,
Expand All @@ -7,6 +7,7 @@ import {
internalIdFromTypeAndPath,
typeAndPathFromInternalId,
} from "@connectors/connectors/microsoft/lib/graph_api";
import type { MicrosoftNodeResource } from "@connectors/resources/microsoft_resource";

export function getRootNodes(): ContentNode[] {
return [getSitesRootAsContentNode(), getTeamsRootAsContentNode()];
Expand Down Expand Up @@ -155,3 +156,56 @@ export function getFolderAsContentNode(
permission: "none",
};
}

export function getFileAsContentNode(
file: microsoftgraph.DriveItem,
parentInternalId: string
): ContentNode {
return {
provider: "microsoft",
internalId: getDriveItemInternalId(file),
parentInternalId,
type: "file",
title: file.name || "unnamed",
sourceUrl: "",
dustDocumentId: null,
lastUpdatedAt: null,
expandable: false,
permission: "none",
};
}

export function getMicrosoftNodeAsContentNode(
node: MicrosoftNodeResource,
expandWorksheet: boolean
): ContentNode {
// When table picking we want spreadsheets to expand to select the different
// sheets. While extracting data we want to treat them as regular files.
const isExpandable =
!node.mimeType ||
(node.mimeType ===
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" &&
expandWorksheet);
let type: ContentNodeType;
if (["drive", "folder"].includes(node.nodeType)) {
type = "folder";
} else if (node.nodeType === "worksheet") {
type = expandWorksheet ? "database" : "file";
} else if (node.nodeType === "file") {
type = node.nodeType;
} else {
throw new Error(`Unsupported nodeType ${node.nodeType}.`);
}
return {
provider: "microsoft",
internalId: node.internalId,
parentInternalId: node.parentInternalId,
type,
title: node.name || "unnamed",
sourceUrl: "",
dustDocumentId: null,
lastUpdatedAt: null,
expandable: isExpandable,
permission: "none",
};
}
8 changes: 1 addition & 7 deletions connectors/src/connectors/microsoft/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -472,13 +472,6 @@ export async function syncOneFile({
return false;
}

if (
mimeType ===
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
) {
return (await syncSpreadSheet({ connectorId, file })).isSupported;
}

const maxDocumentLen = providerConfig.largeFilesEnabled
? MAX_LARGE_DOCUMENT_TXT_LEN
: MAX_DOCUMENT_TXT_LEN;
Expand Down Expand Up @@ -552,6 +545,7 @@ export async function syncOneFile({
const res = await syncSpreadSheet({
connectorId,
file,
parentInternalId,
});
if (!res.isSupported) {
return false;
Expand Down
25 changes: 19 additions & 6 deletions connectors/src/connectors/microsoft/temporal/spreadsheets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ const MAXIMUM_NUMBER_OF_EXCEL_SHEET_ROWS = 50000;
async function upsertSpreadsheetInDb(
connector: ConnectorResource,
internalId: string,
file: microsoftgraph.DriveItem
file: microsoftgraph.DriveItem,
parentInternalId: string
) {
return MicrosoftNodeResource.upsert({
internalId,
Expand All @@ -34,13 +35,15 @@ async function upsertSpreadsheetInDb(
mimeType:
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
lastUpsertedTs: new Date(),
parentInternalId,
});
}

async function upsertWorksheetInDb(
connector: ConnectorResource,
internalId: string,
worksheet: microsoftgraph.WorkbookWorksheet
worksheet: microsoftgraph.WorkbookWorksheet,
parentInternalId: string
) {
return MicrosoftNodeResource.upsert({
internalId,
Expand All @@ -50,6 +53,7 @@ async function upsertWorksheetInDb(
name: worksheet.name ?? "",
mimeType: "text/csv",
lastUpsertedTs: new Date(),
parentInternalId,
});
}

Expand Down Expand Up @@ -95,7 +99,8 @@ async function processSheet(
connector: ConnectorResource,
spreadsheet: microsoftgraph.DriveItem,
internalId: string,
worksheet: microsoftgraph.WorkbookWorksheet
worksheet: microsoftgraph.WorkbookWorksheet,
spreadsheetId: string
): Promise<boolean> {
if (!worksheet.id) {
return false;
Expand Down Expand Up @@ -144,7 +149,7 @@ async function processSheet(
loggerArgs
);

await upsertWorksheetInDb(connector, internalId, worksheet);
await upsertWorksheetInDb(connector, internalId, worksheet, spreadsheetId);

return true;
}
Expand All @@ -160,9 +165,11 @@ async function processSheet(
export async function syncSpreadSheet({
connectorId,
file,
parentInternalId,
}: {
connectorId: number;
file: microsoftgraph.DriveItem;
parentInternalId: string;
}): Promise<
| {
isSupported: false;
Expand Down Expand Up @@ -199,7 +206,12 @@ export async function syncSpreadSheet({
getWorksheets(client, documentId, nextLink)
);

const spreadsheet = await upsertSpreadsheetInDb(connector, documentId, file);
const spreadsheet = await upsertSpreadsheetInDb(
connector,
documentId,
file,
parentInternalId
);

// List synced sheets.
const syncedWorksheets = await spreadsheet.fetchChildren();
Expand All @@ -213,7 +225,8 @@ export async function syncSpreadSheet({
connector,
file,
internalWorkSheetId,
worksheet
worksheet,
documentId
);
if (isImported) {
successfulSheetIdImports.push(internalWorkSheetId);
Expand Down
36 changes: 30 additions & 6 deletions connectors/src/resources/microsoft_resource.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import type { ModelId, Result } from "@dust-tt/types";
import { Ok } from "@dust-tt/types";
import type { Attributes, ModelStatic, Transaction } from "sequelize";
import { Op } from "sequelize";

import type { MicrosoftNode } from "@connectors/connectors/microsoft/lib/types";
import type {
MicrosoftNode,
MicrosoftNodeType,
} from "@connectors/connectors/microsoft/lib/types";
import { concurrentExecutor } from "@connectors/lib/async_utils";
import {
MicrosoftConfigurationModel,
Expand Down Expand Up @@ -280,6 +284,15 @@ export class MicrosoftNodeResource extends BaseResource<MicrosoftNodeModel> {
return resource;
}

static async fetchNodesWithoutParents() {
const blobs = await this.model.findAll({
where: {
parentInternalId: null,
},
});
return blobs.map((blob) => new this(this.model, blob.get()));
}

static async fetchByInternalIds(connectorId: ModelId, internalIds: string[]) {
const blobs = await this.model.findAll({
where: {
Expand All @@ -291,13 +304,24 @@ export class MicrosoftNodeResource extends BaseResource<MicrosoftNodeModel> {
return blobs.map((blob) => new this(this.model, blob.get()));
}

async fetchChildren() {
async fetchChildren(nodeTypes?: MicrosoftNodeType[]) {
const whereClause: {
connectorId: number;
parentInternalId: string;
nodeType?: { [Op.in]: MicrosoftNodeType | MicrosoftNodeType[] };
} = {
connectorId: this.connectorId,
parentInternalId: this.internalId,
};

if (nodeTypes) {
whereClause.nodeType = { [Op.in]: nodeTypes };
}

const blobs = await this.model.findAll({
where: {
connectorId: this.connectorId,
parentInternalId: this.internalId,
},
where: whereClause,
});

if (!blobs) {
return [];
}
Expand Down

0 comments on commit 60f48a5

Please sign in to comment.