Skip to content

Commit

Permalink
double allowed prefix size
Browse files Browse the repository at this point in the history
  • Loading branch information
Henry Fontanier committed Mar 6, 2024
1 parent 040c3b3 commit e8b4748
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 17 deletions.
12 changes: 10 additions & 2 deletions connectors/src/connectors/notion/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2221,7 +2221,10 @@ async function renderPageSection({
// Prefix for depths 0 and 1, and only if children
const blockSection =
depth < 2 && adaptedBlocksByParentId[b.notionBlockId]?.length
? await renderPrefixSection(dsConfig, renderedBlock)
? await renderPrefixSection({
dataSourceConfig: dsConfig,
prefix: renderedBlock,
})
: {
prefix: null,
content: renderedBlock,
Expand Down Expand Up @@ -2356,7 +2359,12 @@ export async function upsertDatabaseStructuredDataFromCache({
);
localLogger.info("Upserting Notion Database as Document.");
const prefix = `${databaseName}\n${csvHeader}`;
const prefixSection = await renderPrefixSection(dataSourceConfig, prefix);
const prefixSection = await renderPrefixSection({
dataSourceConfig,
prefix,
maxPrefixTokens: 128,
maxPrefixChars: 1024,
});
if (!prefixSection.content) {
await upsertToDatasource({
dataSourceConfig,
Expand Down
35 changes: 20 additions & 15 deletions connectors/src/lib/data_sources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -254,32 +254,37 @@ const MAX_PREFIX_CHARS = MAX_PREFIX_TOKENS * 8;
// provided will not be augmented with `\n`, so it should include appropriate carriage return. If
// the prefix is too long (> MAX_PREFIX_TOKENS), it will be truncated. The remained will be returned as
// content of the resulting section.
export async function renderPrefixSection(
dataSourceConfig: DataSourceConfig,
prefix: string | null
): Promise<CoreAPIDataSourceDocumentSection> {
export async function renderPrefixSection({
dataSourceConfig,
prefix,
maxPrefixTokens = MAX_PREFIX_TOKENS,
maxPrefixChars = MAX_PREFIX_CHARS,
}: {
dataSourceConfig: DataSourceConfig;
prefix: string | null;
maxPrefixTokens?: number;
maxPrefixChars?: number;
}): Promise<CoreAPIDataSourceDocumentSection> {
if (!prefix || !prefix.trim()) {
return {
prefix: null,
content: null,
sections: [],
};
}
let targetPrefix = safeSubstring(prefix, 0, MAX_PREFIX_CHARS);
let targetPrefix = safeSubstring(prefix, 0, maxPrefixChars);
let targetContent =
prefix.length > MAX_PREFIX_CHARS
? safeSubstring(prefix, MAX_PREFIX_CHARS)
: "";
prefix.length > maxPrefixChars ? safeSubstring(prefix, maxPrefixChars) : "";

const tokens = await tokenize(targetPrefix, dataSourceConfig);

targetPrefix = tokens
.slice(0, MAX_PREFIX_TOKENS)
.slice(0, maxPrefixTokens)
.map((t) => t[1])
.join("");
targetContent =
tokens
.slice(MAX_PREFIX_TOKENS)
.slice(maxPrefixTokens)
.map((t) => t[1])
.join("") + targetContent;

Expand Down Expand Up @@ -345,10 +350,10 @@ export async function renderMarkdownSection(
throw new Error("Unreachable");
}

const c = await renderPrefixSection(
dsConfig,
toMarkdown(child, { extensions: [gfmToMarkdown()] })
);
const c = await renderPrefixSection({
dataSourceConfig: dsConfig,
prefix: toMarkdown(child, { extensions: [gfmToMarkdown()] }),
});
last.content.sections.push(c);
path.push({
depth: child.depth,
Expand Down Expand Up @@ -405,7 +410,7 @@ export async function renderDocumentTitleAndContent({
} else {
title = null;
}
const c = await renderPrefixSection(dataSourceConfig, title);
const c = await renderPrefixSection({ dataSourceConfig, prefix: title });
let metaPrefix: string | null = "";
if (createdAt && isValidDate(createdAt)) {
metaPrefix += `$createdAt: ${createdAt.toISOString()}\n`;
Expand Down

0 comments on commit e8b4748

Please sign in to comment.