Skip to content

Commit

Permalink
Add: ability to search in attachments
Browse files Browse the repository at this point in the history
  • Loading branch information
Fraggle committed Nov 21, 2024
1 parent 17347f8 commit 543a7a4
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 6 deletions.
17 changes: 13 additions & 4 deletions front/lib/api/assistant/actions/conversation/list_files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ import { BaseAction } from "@dust-tt/types";
import _ from "lodash";

import { isConversationIncludableFileContentType } from "@app/lib/api/assistant/actions/conversation/include_file";
import { listFiles } from "@app/lib/api/assistant/jit_actions";
import {
isConversationQueryableFileContentType,
isConversationSearchableFileContentType,
listFiles,
} from "@app/lib/api/assistant/jit_actions";

interface ConversationListFilesActionBlob {
agentMessageId: ModelId;
Expand Down Expand Up @@ -53,9 +57,14 @@ export class ConversationListFilesAction extends BaseAction {
// TODO(spolu): add mention of viz if enabled and other tools.
`\n`;
for (const f of this.files) {
content +=
`<file id="${f.fileId}" name="${_.escape(f.title)}" type="${f.contentType}" ` +
`includable="${isConversationIncludableFileContentType(f.contentType)}" queryable="${!!f.snippet}"`;
const readyForJIT = !!f.snippet;
const includable = isConversationIncludableFileContentType(f.contentType);
const queryable =
readyForJIT && isConversationQueryableFileContentType(f.contentType);
const searchable =
readyForJIT && isConversationSearchableFileContentType(f.contentType);

content += `<file id="${f.fileId}" name="${_.escape(f.title)}" type="${f.contentType}" includable="${includable}" queryable="${queryable}" searchable="${searchable}"`;

if (f.snippet) {
content += ` snippet="${_.escape(f.snippet)}"`;
Expand Down
89 changes: 87 additions & 2 deletions front/lib/api/assistant/jit_actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import type {
ModelConversationTypeMultiActions,
ModelMessageTypeMultiActions,
Result,
RetrievalConfigurationType,
SupportedContentFragmentType,
TablesQueryConfigurationType,
} from "@dust-tt/types";
import {
Expand All @@ -20,6 +22,7 @@ import {
isContentFragmentMessageTypeModel,
isContentFragmentType,
isDevelopment,
isSupportedImageContentType,
isSupportedPlainTextContentType,
isTablesQueryActionType,
isTextContent,
Expand Down Expand Up @@ -55,6 +58,58 @@ export async function isJITActionsEnabled(
return use;
}

export function isConversationQueryableFileContentType(
contentType: SupportedContentFragmentType
): boolean {
if (isSupportedImageContentType(contentType)) {
return false;
}
// For now we only allow including text files.
switch (contentType) {
case "application/msword":
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
case "application/pdf":
case "text/markdown":
case "text/plain":
case "dust-application/slack":
case "text/tab-separated-values":
case "text/tsv":
return false;

case "text/comma-separated-values":
case "text/csv":
return true;
default:
assertNever(contentType);
}
}

export function isConversationSearchableFileContentType(
contentType: SupportedContentFragmentType
): boolean {
if (isSupportedImageContentType(contentType)) {
return false;
}
// For now we only allow including text files.
switch (contentType) {
case "application/msword":
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
case "application/pdf":
case "text/markdown":
case "text/plain":
case "dust-application/slack":
case "text/tab-separated-values":
case "text/tsv":
return true;

case "text/comma-separated-values":
case "text/csv":
return false;
default:
assertNever(contentType);
}
}

export function listFiles(
conversation: ConversationType
): ConversationFileType[] {
Expand Down Expand Up @@ -124,8 +179,8 @@ export async function getJITActions(
}

// Check tables for the table query action.
const filesUsableAsTableQuery = filesUsableForJIT.filter(
(f) => f.contentType === "text/csv" // TODO: there should not be a hardcoded value here
const filesUsableAsTableQuery = filesUsableForJIT.filter((f) =>
isConversationQueryableFileContentType(f.contentType)
);

if (filesUsableAsTableQuery.length > 0) {
Expand All @@ -146,6 +201,36 @@ export async function getJITActions(
};
actions.push(action);
}

// Check files for the retrieval query action.
const filesUsableAsRetrievalQuery = filesUsableForJIT.filter((f) =>
isConversationSearchableFileContentType(f.contentType)
);

if (filesUsableAsRetrievalQuery.length > 0) {
const action: RetrievalConfigurationType = {
description: filesUsableAsRetrievalQuery
.map(
(f) => `Title: ${f.title}\nFileId: ${f.fileId}\n${f.snippet}`
)
.join("\n\n"),
type: "retrieval_configuration",
id: -1,
name: "query_conversation_retrieval",
sId: generateRandomModelSId(),
topK: "auto",
query: "auto",
relativeTimeFrame: "auto",
dataSources: [
{
workspaceId: conversation.owner.sId,
dataSourceViewId: dataSourceView.sId,
filter: { parents: null },
},
],
};
actions.push(action);
}
}
}
}
Expand Down

0 comments on commit 543a7a4

Please sign in to comment.