From c8a974f1e614bcf29cc40a171f8728f80cf80d5a Mon Sep 17 00:00:00 2001 From: filou Date: Thu, 7 Mar 2024 22:44:02 +0100 Subject: [PATCH 1/4] [Front Search] Better, stabler, cooler, stronger sort order for results Improvement over the previous version, cf test case WIP: still wondering where to put the tests --- front/lib/utils.ts | 83 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/front/lib/utils.ts b/front/lib/utils.ts index f5c35e85b6df..8ed566cebc16 100644 --- a/front/lib/utils.ts +++ b/front/lib/utils.ts @@ -135,8 +135,7 @@ export const objectToMarkdown = (obj: any, indent = 0) => { return markdown; }; -// Returns true if a is a subsequence of b. -export function subFilter(a: string, b: string) { +function subFilterLastIndex(a: string, b: string) { let i = 0; let j = 0; while (i < a.length && j < b.length) { @@ -145,16 +144,58 @@ export function subFilter(a: string, b: string) { } j++; } - return i === a.length; + return i === a.length ? j : -1; +} + +function subFilterFirstIndex(a: string, b: string) { + let i = a.length - 1; + let j = b.length - 1; + while (i >= 0 && j >= 0) { + if (a[i] === b[j]) { + i--; + } + j--; + } + return i === -1 ? j + 1 : -1; } +/** + * Returns true if a is a subfilter of b, i.e. all characters in a are present + * in b in the same order. + */ +export function subFilter(a: string, b: string) { + return subFilterLastIndex(a, b) > -1; +} + +/** + * Compares two strings for fuzzy sorting against a query First sort by spread + * of subfilter, then by first index of subfilter, then length, then by + * lexicographic order + */ export function compareForFuzzySort(query: string, a: string, b: string) { const distanceToQuery = (s: string) => - s.length - query.length + s.indexOf(query.charAt(0)); - if (distanceToQuery(a) === distanceToQuery(b)) { - return a.localeCompare(b); + subFilterLastIndex(query, s) - subFilterFirstIndex(query, s); + const distanceA = distanceToQuery(a); + if (distanceA === 0) { + return 1; + } + const distanceB = distanceToQuery(b); + if (distanceB === 0) { + return -1; } - return distanceToQuery(a) - distanceToQuery(b); + + if (distanceA !== distanceB) { + return distanceA - distanceB; + } + const firstCharA = a.indexOf(query.charAt(0)); + const firstCharB = b.indexOf(query.charAt(0)); + if (firstCharA !== firstCharB) { + return firstCharA - firstCharB; + } + if (a.length !== b.length) { + return a.length - b.length; + } + return a.localeCompare(b); } export function filterAndSortAgents( @@ -167,7 +208,6 @@ export function filterAndSortAgents( subFilter(lowerCaseSearchText, a.name.toLowerCase()) ); - // Sort by position of the subFilter in the name (position of the first character matching). if (searchText.length > 0) { filtered.sort((a, b) => compareForFuzzySort(lowerCaseSearchText, a.name, b.name) @@ -176,3 +216,30 @@ export function filterAndSortAgents( return filtered; } + +function testCompareForFuzzySort() { + // a is always closer to the query than b + const data = [ + { query: "eng", a: "eng", b: "ContentMarketing" }, + { query: "sql", a: "sqlGod", b: "sqlCoreGod" }, + { query: "sql", a: "sql", b: "sqlGod" }, + { query: "sql", a: "sql", b: "SEOQualityRater" }, + { query: "gp", a: "gpt-4", b: "GabHelp" }, + { query: "gp", a: "gpt-4", b: "gemni-pro" }, + { query: "start", a: "robotstart", b: "strongrt" }, + { query: "mygod", a: "ohmygodbot", b: "moatmode" }, + { query: "test", a: "test", b: "testlong" }, + { query: "test", a: "testlonger", b: "longtest" }, + ]; + console.log( + "Testing compareForFuzzySort, expected first then expected second" + ); + for (const d of data) { + console.log( + compareForFuzzySort(d.query, d.a, d.b) < 0 ? "PASS" : "FAIL", + d + ); + } +} + +testCompareForFuzzySort(); From e09a1df66b25ea3df2a4af3322ce5d25e451a0fd Mon Sep 17 00:00:00 2001 From: filou Date: Thu, 7 Mar 2024 23:02:36 +0100 Subject: [PATCH 2/4] use subFilterFirstIndex instead of indexOf(charAt) --- front/lib/utils.ts | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/front/lib/utils.ts b/front/lib/utils.ts index 8ed566cebc16..3ffd568eac4e 100644 --- a/front/lib/utils.ts +++ b/front/lib/utils.ts @@ -173,25 +173,28 @@ export function subFilter(a: string, b: string) { * lexicographic order */ export function compareForFuzzySort(query: string, a: string, b: string) { - const distanceToQuery = (s: string) => - subFilterLastIndex(query, s) - subFilterFirstIndex(query, s); - const distanceA = distanceToQuery(a); - if (distanceA === 0) { + const subFilterFirstIndexA = subFilterFirstIndex(query, a); + if (subFilterFirstIndexA === -1) { return 1; } - const distanceB = distanceToQuery(b); - if (distanceB === 0) { + + const subFilterFirstIndexB = subFilterFirstIndex(query, b); + if (subFilterFirstIndexB === -1) { return -1; } + const subFilterLastIndexA = subFilterLastIndex(query, a); + const subFilterLastIndexB = subFilterLastIndex(query, b); + const distanceA = subFilterLastIndexA - subFilterFirstIndexA; + const distanceB = subFilterLastIndexB - subFilterFirstIndexB; if (distanceA !== distanceB) { return distanceA - distanceB; } - const firstCharA = a.indexOf(query.charAt(0)); - const firstCharB = b.indexOf(query.charAt(0)); - if (firstCharA !== firstCharB) { - return firstCharA - firstCharB; + + if (subFilterFirstIndexA !== subFilterFirstIndexB) { + return subFilterFirstIndexA - subFilterFirstIndexB; } + if (a.length !== b.length) { return a.length - b.length; } From 6b6f5ab8af8f32ad9dd45731d1dc4d8c012d692f Mon Sep 17 00:00:00 2001 From: filou Date: Fri, 8 Mar 2024 16:37:30 +0100 Subject: [PATCH 3/4] doc --- front/lib/utils.ts | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/front/lib/utils.ts b/front/lib/utils.ts index 3ffd568eac4e..0d32c6dc9283 100644 --- a/front/lib/utils.ts +++ b/front/lib/utils.ts @@ -135,6 +135,17 @@ export const objectToMarkdown = (obj: any, indent = 0) => { return markdown; }; +/** + * Checks if a is a subfilter of b, i.e. all characters in a are present in b in + * the same order, and returns the smallest index of the last character of a in + * b. + * + * Used in conjunction with subFilterFirstIndex to compare how much a is 'spread + * out' in b. e.g. + * - 'god' and 'sqlGod', spread is 3 (index of d minus index of g in 'sqlGod') + * - 'gp4' and 'gpt-4', spread is 5 + * - 'gp4' and 'gemni-pro4', spread is 10 + */ function subFilterLastIndex(a: string, b: string) { let i = 0; let j = 0; @@ -147,6 +158,15 @@ function subFilterLastIndex(a: string, b: string) { return i === a.length ? j : -1; } +/** + * Checks if a is a subfilter of b, i.e. all characters in a are present in b in + * the same order, and returns the biggest index of the first character of a in b. + * Used in conjunction with subFilterFirstIndex to compare how much a is 'spread + * out' in b. e.g. + * - 'god' and 'sqlGod', spread is 3 (index of d minus index of g in 'sqlGod') + * - 'gp4' and 'gpt-4', spread is 5 + * - 'gp4' and 'gemni-pro4', spread is 10 + */ function subFilterFirstIndex(a: string, b: string) { let i = a.length - 1; let j = b.length - 1; From ec1fcb51fe3f291ff5388a76e328b67e2b490d0b Mon Sep 17 00:00:00 2001 From: filou Date: Fri, 8 Mar 2024 16:48:00 +0100 Subject: [PATCH 4/4] moved tests --- front/lib/utils.ts | 27 --------------------------- front/tests/lib/utils.test.ts | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 27 deletions(-) create mode 100644 front/tests/lib/utils.test.ts diff --git a/front/lib/utils.ts b/front/lib/utils.ts index 0d32c6dc9283..fc6d53d016dd 100644 --- a/front/lib/utils.ts +++ b/front/lib/utils.ts @@ -239,30 +239,3 @@ export function filterAndSortAgents( return filtered; } - -function testCompareForFuzzySort() { - // a is always closer to the query than b - const data = [ - { query: "eng", a: "eng", b: "ContentMarketing" }, - { query: "sql", a: "sqlGod", b: "sqlCoreGod" }, - { query: "sql", a: "sql", b: "sqlGod" }, - { query: "sql", a: "sql", b: "SEOQualityRater" }, - { query: "gp", a: "gpt-4", b: "GabHelp" }, - { query: "gp", a: "gpt-4", b: "gemni-pro" }, - { query: "start", a: "robotstart", b: "strongrt" }, - { query: "mygod", a: "ohmygodbot", b: "moatmode" }, - { query: "test", a: "test", b: "testlong" }, - { query: "test", a: "testlonger", b: "longtest" }, - ]; - console.log( - "Testing compareForFuzzySort, expected first then expected second" - ); - for (const d of data) { - console.log( - compareForFuzzySort(d.query, d.a, d.b) < 0 ? "PASS" : "FAIL", - d - ); - } -} - -testCompareForFuzzySort(); diff --git a/front/tests/lib/utils.test.ts b/front/tests/lib/utils.test.ts new file mode 100644 index 000000000000..11352e5023b6 --- /dev/null +++ b/front/tests/lib/utils.test.ts @@ -0,0 +1,20 @@ +import { compareForFuzzySort } from "@app/lib/utils"; + +test("compareForFuzzySort should correctly compare strings", () => { + const data = [ + { query: "eng", a: "eng", b: "ContentMarketing" }, + { query: "sql", a: "sqlGod", b: "sqlCoreGod" }, + { query: "sql", a: "sql", b: "sqlGod" }, + { query: "sql", a: "sql", b: "SEOQualityRater" }, + { query: "gp", a: "gpt-4", b: "GabHelp" }, + { query: "gp", a: "gpt-4", b: "gemni-pro" }, + { query: "start", a: "robotstart", b: "strongrt" }, + { query: "mygod", a: "ohmygodbot", b: "moatmode" }, + { query: "test", a: "test", b: "testlong" }, + { query: "test", a: "testlonger", b: "longtest" }, + ]; + + for (const d of data) { + expect(compareForFuzzySort(d.query, d.a, d.b)).toBeLessThan(0); + } +});