Skip to content

Commit

Permalink
Skip Google spreadsheet when size exceeds 128 MB (#4202)
Browse files Browse the repository at this point in the history
* Skip Google spreadsheet when size exceeds 200 MB

* Add size to attributes to fetch

* 👕

* 👕

* Lower to 128MB.

* 📖
  • Loading branch information
flvndvd authored Mar 7, 2024
1 parent 71db9ae commit 851f4ef
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 15 deletions.
17 changes: 17 additions & 0 deletions connectors/src/connectors/google_drive/temporal/spreadsheets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { ConnectorResource } from "@connectors/resources/connector_resource";
import type { GoogleDriveObjectType } from "@connectors/types/google_drive";

const MAXIMUM_NUMBER_OF_GSHEET_ROWS = 10000;
const MAX_FILE_SIZE = 128 * 1024 * 1024; // 200 MB in bytes.

type Sheet = sheets_v4.Schema$ValueRange & {
id: number;
Expand Down Expand Up @@ -386,6 +387,22 @@ export async function syncSpreadSheet(
"[Spreadsheet] Syncing Google Spreadsheet."
);

// Avoid import attempts for sheets exceeding the max size due to Node constraints.
if (file.size && file.size > MAX_FILE_SIZE) {
logger.info(
{
...loggerArgs,
spreadsheet: {
id: file.id,
},
size: file.size,
},
"[Spreadsheet] Spreadsheet size exceeded, skipping further processing."
);

return false;
}

const sheetsAPI = google.sheets({ version: "v4", auth: oauth2client });

const getSpreadsheet = async () => {
Expand Down
2 changes: 2 additions & 0 deletions connectors/src/connectors/google_drive/temporal/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export async function driveObjectToDustType(
webViewLink: file.webViewLink ? file.webViewLink : undefined,
createdAtMs: new Date(file.createdTime).getTime(),
trashed: false,
size: null,
capabilities: {
canDownload: false,
},
Expand All @@ -57,6 +58,7 @@ export async function driveObjectToDustType(
webViewLink: file.webViewLink ? file.webViewLink : undefined,
createdAtMs: new Date(file.createdTime).getTime(),
trashed: file.trashed ? file.trashed : false,
size: file.size ? parseInt(file.size, 10) : null,
updatedAtMs: file.modifiedTime
? new Date(file.modifiedTime).getTime()
: undefined,
Expand Down
32 changes: 17 additions & 15 deletions connectors/src/types/google_drive.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
export type GoogleDriveObjectType = {
capabilities: {
canDownload: boolean;
};
createdAtMs: number;
id: string;
lastEditor?: {
displayName: string;
};
mimeType: string;
name: string;
parent: string | null;
createdAtMs: number;
size: number | null;
trashed: boolean;
updatedAtMs?: number;
webViewLink?: string;
mimeType: string;
trashed: boolean;
lastEditor?: {
displayName: string;
};
capabilities: {
canDownload: boolean;
};
};
export type GoogleDriveFolderType = {
id: string;
Expand All @@ -26,15 +27,16 @@ export type GoogleDriveSelectedFolderType = GoogleDriveFolderType & {
};

export const FILE_ATTRIBUTES_TO_FETCH = [
"id",
"name",
"parents",
"mimeType",
"capabilities",
"createdTime",
"driveId",
"id",
"lastModifyingUser",
"mimeType",
"modifiedTime",
"name",
"parents",
"size",
"trashed",
"webViewLink",
"capabilities",
"driveId",
] as const;

0 comments on commit 851f4ef

Please sign in to comment.