From 342aa1ca530a96e327a2f0c842bc572964a432b8 Mon Sep 17 00:00:00 2001 From: Logan Yang Date: Wed, 8 Jan 2025 15:23:56 -0800 Subject: [PATCH 1/4] Add truncation logic for massive context in plus mode --- src/LLMProviders/chainRunner.ts | 34 +++++++++++++++++++++++++++------ src/constants.ts | 1 + src/tools/SearchTools.ts | 7 ++++++- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/LLMProviders/chainRunner.ts b/src/LLMProviders/chainRunner.ts index 18e17cf2..a3719cf4 100644 --- a/src/LLMProviders/chainRunner.ts +++ b/src/LLMProviders/chainRunner.ts @@ -1,4 +1,11 @@ -import { ABORT_REASON, AI_SENDER, EMPTY_INDEX_ERROR_MESSAGE, LOADING_MESSAGES } from "@/constants"; +import { + ABORT_REASON, + AI_SENDER, + EMPTY_INDEX_ERROR_MESSAGE, + LOADING_MESSAGES, + MAX_CHARS_FOR_LOCAL_SEARCH_CONTEXT, +} from "@/constants"; +import { BrevilabsClient } from "@/LLMProviders/brevilabsClient"; import { getSystemPrompt } from "@/settings/model"; import { ChatMessage } from "@/sharedState"; import { ToolManager } from "@/tools/toolManager"; @@ -11,7 +18,6 @@ import { import { Notice } from "obsidian"; import ChainManager from "./chainManager"; import { COPILOT_TOOL_NAMES, IntentAnalyzer } from "./intentAnalyzer"; -import { BrevilabsClient } from "@/LLMProviders/brevilabsClient"; export interface ChainRunner { run( @@ -421,7 +427,7 @@ class CopilotPlusChainRunner extends BaseChainRunner { if (debug) console.log("==== Step 4: Preparing context ===="); const timeExpression = this.getTimeExpression(toolCalls); - const context = this.formatLocalSearchResult(documents, timeExpression); + const context = this.prepareLocalSearchResult(documents, timeExpression); const currentTimeOutputs = toolOutputs.filter((output) => output.tool === "getCurrentTime"); const enhancedQuestion = this.prepareEnhancedUserMessage( @@ -593,11 +599,27 @@ class CopilotPlusChainRunner extends BaseChainRunner { return timeRangeCall ? timeRangeCall.args.timeExpression : ""; } - private formatLocalSearchResult(documents: any[], timeExpression: string): string { - const formattedDocs = documents - .filter((doc) => doc.includeInContext) + private prepareLocalSearchResult(documents: any[], timeExpression: string): string { + // First filter documents with includeInContext + const includedDocs = documents.filter((doc) => doc.includeInContext); + + // Calculate total content length + const totalLength = includedDocs.reduce((sum, doc) => sum + doc.content.length, 0); + + // If total length exceeds threshold, calculate truncation ratio + let truncatedDocs = includedDocs; + if (totalLength > MAX_CHARS_FOR_LOCAL_SEARCH_CONTEXT) { + const truncationRatio = MAX_CHARS_FOR_LOCAL_SEARCH_CONTEXT / totalLength; + truncatedDocs = includedDocs.map((doc) => ({ + ...doc, + content: doc.content.slice(0, Math.floor(doc.content.length * truncationRatio)), + })); + } + + const formattedDocs = truncatedDocs .map((doc: any) => `Note in Vault: ${doc.content}`) .join("\n\n"); + return timeExpression ? `Local Search Result for ${timeExpression}:\n${formattedDocs}` : `Local Search Result:\n${formattedDocs}`; diff --git a/src/constants.ts b/src/constants.ts index 54640e53..f041024e 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -23,6 +23,7 @@ export const CHUNK_SIZE = 4000; export const CONTEXT_SCORE_THRESHOLD = 0.4; export const TEXT_WEIGHT = 0.4; export const PLUS_MODE_DEFAULT_SOURCE_CHUNKS = 15; +export const MAX_CHARS_FOR_LOCAL_SEARCH_CONTEXT = 512000; export const LOADING_MESSAGES = { DEFAULT: "", READING_FILES: "Reading files", diff --git a/src/tools/SearchTools.ts b/src/tools/SearchTools.ts index 7c4d03f8..357628b2 100644 --- a/src/tools/SearchTools.ts +++ b/src/tools/SearchTools.ts @@ -33,9 +33,14 @@ const localSearchTool = tool( ? PLUS_MODE_DEFAULT_SOURCE_CHUNKS : getSettings().maxSourceChunks; + if (getSettings().debug) { + console.log("returnAll:", returnAll); + console.log("maxSourceChunks:", maxSourceChunks); + } + const hybridRetriever = new HybridRetriever({ minSimilarityScore: returnAll ? 0.0 : 0.1, - maxK: returnAll ? 100 : maxSourceChunks, + maxK: returnAll ? 1000 : maxSourceChunks, salientTerms, timeRange: timeRange ? { From 1708cfb645b1f1c271257b8dd33d4d52797f9e84 Mon Sep 17 00:00:00 2001 From: Logan Yang Date: Wed, 8 Jan 2025 18:29:56 -0800 Subject: [PATCH 2/4] Support more year time expressions --- src/tools/TimeTools.ts | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/tools/TimeTools.ts b/src/tools/TimeTools.ts index 5f636732..46a30f20 100644 --- a/src/tools/TimeTools.ts +++ b/src/tools/TimeTools.ts @@ -211,6 +211,26 @@ function getTimeRangeMs(timeExpression: string): }; } + // Check if input matches various year formats + const yearMatch = normalizedInput.match( + /^(?:(?:the\s+)?(?:year|yr)(?:\s+(?:of|in))?\s+)?(\d{4})$/i + ); + if (yearMatch) { + const year = parseInt(yearMatch[1]); + start = DateTime.fromObject({ year, month: 1, day: 1 }); + end = DateTime.fromObject({ year, month: 12, day: 31 }); + + if (start > now) { + start = start.minus({ years: 1 }); + end = end.minus({ years: 1 }); + } + + return { + startTime: convertToTimeInfo(start), + endTime: convertToTimeInfo(end), + }; + } + // Use Chrono.js for parsing dates timeExpression = timeExpression.replace("@vault", ""); const parsedDates = chrono.parse(timeExpression, now.toJSDate(), { forwardDate: false }); From b9276e13aa925d75f3f10b4575cfde21fe5d5085 Mon Sep 17 00:00:00 2001 From: Logan Yang Date: Wed, 8 Jan 2025 19:44:44 -0800 Subject: [PATCH 3/4] Update indexing batchsize to 16 and batch error message --- src/search/indexOperations.ts | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/search/indexOperations.ts b/src/search/indexOperations.ts index 8f38a869..1ca7a116 100644 --- a/src/search/indexOperations.ts +++ b/src/search/indexOperations.ts @@ -9,7 +9,7 @@ import { App, Notice, TFile } from "obsidian"; import { DBOperations } from "./dbOperations"; import { extractAppIgnoreSettings, getFilePathsForQA } from "./searchUtils"; -const EMBEDDING_BATCH_SIZE = 64; +const EMBEDDING_BATCH_SIZE = 16; const CHECKPOINT_INTERVAL = 8 * EMBEDDING_BATCH_SIZE; export interface IndexingState { @@ -132,7 +132,20 @@ export class IndexOperations { console.log("Copilot index checkpoint save completed."); } } catch (err) { - this.handleIndexingError(err, batch[0].fileInfo.path, errors, rateLimitNoticeShown); + console.error("Batch processing error:", { + error: err, + batchSize: batch?.length || 0, + firstChunk: batch?.[0] + ? { + path: batch[0].fileInfo?.path, + contentLength: batch[0].content?.length, + hasFileInfo: !!batch[0].fileInfo, + } + : "No chunks in batch", + errorType: err?.constructor?.name, + errorMessage: err?.message, + }); + this.handleIndexingError(err, batch?.[0]?.fileInfo?.path, errors, rateLimitNoticeShown); if (this.isRateLimitError(err)) { rateLimitNoticeShown = true; break; @@ -389,14 +402,14 @@ export class IndexOperations { private handleIndexingError( err: any, - file: TFile, + filePath: string, errors: string[], rateLimitNoticeShown: boolean ): void { - console.error(`Error indexing file ${file.path}:`, err); - errors.push(file.path); + console.error(`Error indexing file ${filePath || "unknown"}:`, err); + errors.push(filePath || "unknown"); if (!rateLimitNoticeShown) { - new Notice(`Error indexing file ${file.path}. Check console for details.`); + new Notice(`Error indexing file ${filePath || "unknown"}. Check console for details.`); } } From 7f8db8f3768a1b61a5b188c0f82d45fd99ccfcb0 Mon Sep 17 00:00:00 2001 From: Logan Yang Date: Wed, 8 Jan 2025 20:04:18 -0800 Subject: [PATCH 4/4] Support more time expressions --- src/tools/SearchTools.ts | 1 - src/tools/TimeTools.ts | 289 ++++++++++++++++++--------------------- 2 files changed, 136 insertions(+), 154 deletions(-) diff --git a/src/tools/SearchTools.ts b/src/tools/SearchTools.ts index 357628b2..b319c40a 100644 --- a/src/tools/SearchTools.ts +++ b/src/tools/SearchTools.ts @@ -35,7 +35,6 @@ const localSearchTool = tool( if (getSettings().debug) { console.log("returnAll:", returnAll); - console.log("maxSourceChunks:", maxSourceChunks); } const hybridRetriever = new HybridRetriever({ diff --git a/src/tools/TimeTools.ts b/src/tools/TimeTools.ts index 46a30f20..d9de981a 100644 --- a/src/tools/TimeTools.ts +++ b/src/tools/TimeTools.ts @@ -67,196 +67,179 @@ const monthNames = { december: 12, } as const; -function getTimeRangeMs(timeExpression: string): - | { - startTime: TimeInfo; - endTime: TimeInfo; - } - | undefined { - const now = DateTime.now(); - let start: DateTime; - let end: DateTime; +/** + * Handles relative time range patterns like: + * - "last 3 days", "past 3 days" + * - "last 2 weeks", "past 2 weeks" + * - "last 6 months", "previous 6 months" + * - "last 2 years", "prior 2 years" + */ +function handleRelativeTimeRange(input: string, now: DateTime) { + // Match numeric patterns with various past-tense prefixes + const relativeMatch = input.match( + /^(last|past|previous|prior)\s+(\d+)\s+(days?|weeks?|months?|years?)$/i + ); - const normalizedInput = timeExpression.toLowerCase().replace("@vault", "").trim(); + if (!relativeMatch) return undefined; + + const [, , amountStr, unit] = relativeMatch; + const amount = parseInt(amountStr); + + if (amount <= 0) { + return undefined; + } + + const unitSingular = unit.replace(/s$/, "") as "day" | "week" | "month" | "year"; - // Handle special cases first - switch (normalizedInput) { + const end = now.startOf("day"); + const start = end.minus({ [unitSingular + "s"]: amount }); + return { start, end }; +} + +/** + * Handles special time ranges like: + * - "yesterday" + * - "last week", "this week", "next week" + * - "last month", "this month", "next month" + * - "last year", "this year", "next year" + */ +function handleSpecialTimeRanges(input: string, now: DateTime) { + switch (input) { case "yesterday": - start = now.minus({ days: 1 }).startOf("day"); - end = now.minus({ days: 1 }).endOf("day"); return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), + start: now.minus({ days: 1 }).startOf("day"), + end: now.minus({ days: 1 }).endOf("day"), }; case "last week": - start = now.minus({ weeks: 1 }).startOf("week"); - end = now.minus({ weeks: 1 }).endOf("week"); - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; - case "this week": - start = now.startOf("week"); - end = now.endOf("week"); - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; - case "next week": - start = now.plus({ weeks: 1 }).startOf("week"); - end = now.plus({ weeks: 1 }).endOf("week"); - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; - case "last month": - start = now.minus({ months: 1 }).startOf("month"); - end = now.minus({ months: 1 }).endOf("month"); return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; - case "this month": - start = now.startOf("month"); - end = now.endOf("month"); - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; - case "next month": - start = now.plus({ months: 1 }).startOf("month"); - end = now.plus({ months: 1 }).endOf("month"); - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; - case "last year": - start = now.minus({ years: 1 }).startOf("year"); - end = now.minus({ years: 1 }).endOf("year"); - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; - case "this year": - start = now.startOf("year"); - end = now.endOf("year"); - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; - case "next year": - start = now.plus({ years: 1 }).startOf("year"); - end = now.plus({ years: 1 }).endOf("year"); - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), + start: now.minus({ weeks: 1 }).startOf("week"), + end: now.minus({ weeks: 1 }).endOf("week"), }; + // ... other cases } + return undefined; +} - // Check for "week of" pattern first - const weekOfMatch = normalizedInput.match(/(?:the\s+)?week\s+of\s+(.+)/i); - if (weekOfMatch) { - const dateStr = weekOfMatch[1]; - const parsedDates = chrono.parse(dateStr, now.toJSDate(), { forwardDate: false }); - if (parsedDates.length > 0) { - start = DateTime.fromJSDate(parsedDates[0].start.date()).startOf("week"); - end = start.endOf("week"); - - if (start > now) { - start = start.minus({ years: 1 }); - end = end.minus({ years: 1 }); - } - - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; - } +/** + * Handles "week of" pattern like: + * - "week of July 1st" + * - "week of 2023-07-01" + * - "the week of last Monday" + */ +function handleWeekOf(input: string, now: DateTime) { + const weekOfMatch = input.match(/(?:the\s+)?week\s+of\s+(.+)/i); + if (!weekOfMatch) return undefined; + + const dateStr = weekOfMatch[1]; + const parsedDates = chrono.parse(dateStr, now.toJSDate(), { forwardDate: false }); + if (parsedDates.length === 0) return undefined; + + let start = DateTime.fromJSDate(parsedDates[0].start.date()).startOf("week"); + let end = start.endOf("week"); + + if (start > now) { + start = start.minus({ years: 1 }); + end = end.minus({ years: 1 }); } - // Check if input is just a month name - const monthMatch = normalizedInput.match( + return { start, end }; +} + +/** + * Handles single month names like: + * - "january", "jan" + * - "december", "dec" + */ +function handleMonthName(input: string, now: DateTime) { + const monthMatch = input.match( /^(jan|january|feb|february|mar|march|apr|april|may|jun|june|jul|july|aug|august|sep|september|oct|october|nov|november|dec|december)$/i ); - if (monthMatch) { - const monthNum = monthNames[monthMatch[1] as keyof typeof monthNames]; - let year = now.year; + if (!monthMatch) return undefined; - // If the month is in the future, use last year - if (monthNum > now.month) { - year--; - } + const monthNum = monthNames[monthMatch[1] as keyof typeof monthNames]; + let year = now.year; - // Create start and end dates for the entire month - start = DateTime.fromObject({ - year, - month: monthNum, - day: 1, - }); + if (monthNum > now.month) { + year--; + } - end = start.endOf("month"); + let start = DateTime.fromObject({ + year, + month: monthNum, + day: 1, + }); + let end = start.endOf("month"); - if (start > now) { - start = start.minus({ years: 1 }); - end = end.minus({ years: 1 }); - } + if (start > now) { + start = start.minus({ years: 1 }); + end = end.minus({ years: 1 }); + } - if (start > end) { - [start, end] = [end, start]; - } + return { start, end }; +} - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; +/** + * Handles year patterns like: + * - "2023" + * - "year 2023" + * - "the year of 2023" + */ +function handleYear(input: string, now: DateTime) { + const yearMatch = input.match(/^(?:(?:the\s+)?(?:year|yr)(?:\s+(?:of|in))?\s+)?(\d{4})$/i); + if (!yearMatch) return undefined; + + const year = parseInt(yearMatch[1]); + let start = DateTime.fromObject({ year, month: 1, day: 1 }); + let end = DateTime.fromObject({ year, month: 12, day: 31 }); + + if (start > now) { + start = start.minus({ years: 1 }); + end = end.minus({ years: 1 }); } - // Check if input matches various year formats - const yearMatch = normalizedInput.match( - /^(?:(?:the\s+)?(?:year|yr)(?:\s+(?:of|in))?\s+)?(\d{4})$/i - ); - if (yearMatch) { - const year = parseInt(yearMatch[1]); - start = DateTime.fromObject({ year, month: 1, day: 1 }); - end = DateTime.fromObject({ year, month: 12, day: 31 }); + return { start, end }; +} - if (start > now) { - start = start.minus({ years: 1 }); - end = end.minus({ years: 1 }); - } +function getTimeRangeMs(timeExpression: string) { + const now = DateTime.now(); + const normalizedInput = timeExpression.toLowerCase().replace("@vault", "").trim(); + // Try each parser in sequence + const result = + handleRelativeTimeRange(normalizedInput, now) || + handleSpecialTimeRanges(normalizedInput, now) || + handleWeekOf(normalizedInput, now) || + handleMonthName(normalizedInput, now) || + handleYear(normalizedInput, now); + + if (result) { return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), + startTime: convertToTimeInfo(result.start), + endTime: convertToTimeInfo(result.end), }; } - // Use Chrono.js for parsing dates - timeExpression = timeExpression.replace("@vault", ""); + // Fallback to chrono parser for other date formats const parsedDates = chrono.parse(timeExpression, now.toJSDate(), { forwardDate: false }); if (parsedDates.length > 0) { - // Convert to DateTime while preserving the local timezone - start = DateTime.fromJSDate(parsedDates[0].start.date()).startOf("day"); - - // If no end date is specified, use the same day as end date - end = parsedDates[0].end + const start = DateTime.fromJSDate(parsedDates[0].start.date()).startOf("day"); + const end = parsedDates[0].end ? DateTime.fromJSDate(parsedDates[0].end.date()).endOf("day") : start.endOf("day"); - // If the parsed date is in the future, adjust it to the previous occurrence if (start > now) { - start = start.minus({ years: 1 }); - end = end.minus({ years: 1 }); + start.minus({ years: 1 }); + end.minus({ years: 1 }); } - } else { - console.warn(`Unable to parse time expression: ${timeExpression}`); - return; + + return { + startTime: convertToTimeInfo(start), + endTime: convertToTimeInfo(end), + }; } - return { - startTime: convertToTimeInfo(start), - endTime: convertToTimeInfo(end), - }; + console.warn(`Unable to parse time expression: ${timeExpression}`); + return undefined; } function convertToTimeInfo(dateTime: DateTime): TimeInfo {