From 04d68c985767762019d05121bba8e50ba29641bc Mon Sep 17 00:00:00 2001 From: Henry Fontanier Date: Wed, 27 Nov 2024 16:38:04 +0100 Subject: [PATCH] feat(kw-search): pass title & mime when upserting tables from connectors (#8962) * feat(kw-search): pass title and mimetype when upserting tables from connectors * improve mimetype for notion DB --------- Co-authored-by: Henry Fontanier --- .../src/connectors/google_drive/temporal/spreadsheets.ts | 2 ++ .../src/connectors/microsoft/temporal/spreadsheets.ts | 3 +++ connectors/src/connectors/notion/temporal/activities.ts | 4 ++++ connectors/src/connectors/shared/file.ts | 2 ++ connectors/src/lib/data_sources.ts | 6 ++++++ 5 files changed, 17 insertions(+) diff --git a/connectors/src/connectors/google_drive/temporal/spreadsheets.ts b/connectors/src/connectors/google_drive/temporal/spreadsheets.ts index ea38e9ae46b2..be7b31f022c4 100644 --- a/connectors/src/connectors/google_drive/temporal/spreadsheets.ts +++ b/connectors/src/connectors/google_drive/temporal/spreadsheets.ts @@ -83,6 +83,8 @@ async function upsertGdriveTable( truncate: true, parents: [tableId, ...parents], useAppForHeaderDetection: true, + title: `${spreadsheet.title} - ${title}`, + mimeType: "application/vnd.google-apps.spreadsheet", }); logger.info(loggerArgs, "[Spreadsheet] Table upserted."); diff --git a/connectors/src/connectors/microsoft/temporal/spreadsheets.ts b/connectors/src/connectors/microsoft/temporal/spreadsheets.ts index eda3bfd1c204..c5f13d384d2b 100644 --- a/connectors/src/connectors/microsoft/temporal/spreadsheets.ts +++ b/connectors/src/connectors/microsoft/temporal/spreadsheets.ts @@ -97,6 +97,9 @@ async function upsertMSTable( truncate: true, parents, useAppForHeaderDetection: true, + title: `${spreadsheet.name} - ${worksheet.name}`, + mimeType: + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", }); logger.info(loggerArgs, "[Spreadsheet] Table upserted."); diff --git a/connectors/src/connectors/notion/temporal/activities.ts b/connectors/src/connectors/notion/temporal/activities.ts index 99fe5fce351c..faeb054090da 100644 --- a/connectors/src/connectors/notion/temporal/activities.ts +++ b/connectors/src/connectors/notion/temporal/activities.ts @@ -1812,6 +1812,8 @@ export async function renderAndUpsertPageFromCache({ // We only update the rowId of for the page without truncating the rest of the table (incremental sync). truncate: false, parents, + title: parentDb.title ?? "Untitled Notion Database", + mimeType: "application/vnd.notion.database", }), localLogger ); @@ -2522,6 +2524,8 @@ export async function upsertDatabaseStructuredDataFromCache({ // We overwrite the whole table since we just fetched all child pages. truncate: true, parents, + title: dbModel.title ?? "Untitled Notion Database", + mimeType: "notion/database", }), localLogger ); diff --git a/connectors/src/connectors/shared/file.ts b/connectors/src/connectors/shared/file.ts index ec2d7604d7da..f6e21d9880cb 100644 --- a/connectors/src/connectors/shared/file.ts +++ b/connectors/src/connectors/shared/file.ts @@ -78,6 +78,8 @@ export async function handleCsvFile({ }, truncate: true, parents, + title: fileName, + mimeType: "text/csv", }); } catch (err) { localLogger.warn({ error: err }, "Error while parsing or upserting table"); diff --git a/connectors/src/lib/data_sources.ts b/connectors/src/lib/data_sources.ts index 6e13e5ff634f..966a679fcea7 100644 --- a/connectors/src/lib/data_sources.ts +++ b/connectors/src/lib/data_sources.ts @@ -709,6 +709,8 @@ export async function upsertTableFromCsv({ truncate, parents, useAppForHeaderDetection, + title, + mimeType, }: { dataSourceConfig: DataSourceConfig; tableId: string; @@ -719,6 +721,8 @@ export async function upsertTableFromCsv({ truncate: boolean; parents: string[]; useAppForHeaderDetection?: boolean; + title: string; + mimeType: string; }) { const localLogger = logger.child({ ...loggerArgs, tableId, tableName }); const statsDTags = [ @@ -754,6 +758,8 @@ export async function upsertTableFromCsv({ truncate, async: true, useAppForHeaderDetection, + title, + mimeType, }; const dustRequestConfig: AxiosRequestConfig = { headers: {