diff --git a/connectors/src/connectors/google_drive/temporal/activities.ts b/connectors/src/connectors/google_drive/temporal/activities.ts index a4e77a87be84..541c6984f96a 100644 --- a/connectors/src/connectors/google_drive/temporal/activities.ts +++ b/connectors/src/connectors/google_drive/temporal/activities.ts @@ -509,6 +509,19 @@ async function syncOneFile( return false; } + if (!documentContent || documentContent.trim().length === 0) { + logger.info( + { + connectorId: connectorId, + documentId, + fileMimeType: file.mimeType, + fileId: file.id, + title: file.name, + }, + "Skipping empty document" + ); + } + const content = renderDocumentTitleAndContent({ title: file.name, updatedAt: file.updatedAtMs ? new Date(file.updatedAtMs) : undefined, diff --git a/front/lib/connector_providers.ts b/front/lib/connector_providers.ts index f974243ad06d..b7f5535c2ce2 100644 --- a/front/lib/connector_providers.ts +++ b/front/lib/connector_providers.ts @@ -58,7 +58,7 @@ export const CONNECTOR_CONFIGURATIONS: Record< description: "Authorize granular access to your company's Google Drive, by drives and folders. Supported files include GDocs, GSlides, and .txt files. Email us for .pdf indexation.", limitations: - "Files with more than 750KB of extracted text are ignored. By default, PDF files are not indexed. Email us at team@dust.tt to enable PDF indexing.", + "Files with empty text content or with more than 750KB of extracted text are ignored. By default, PDF files are not indexed. Email us at team@dust.tt to enable PDF indexing.", logoComponent: DriveLogo, isNested: true, },