From b433d287069cfaa48a5f2c05fa5a73eff6711e6b Mon Sep 17 00:00:00 2001 From: sukoom pornsuksiri Date: Thu, 14 Nov 2024 11:45:36 +0700 Subject: [PATCH 1/3] fix thai search hilight --- app/utils/markdown/index.test.ts | 5 +++++ app/utils/markdown/index.ts | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/app/utils/markdown/index.test.ts b/app/utils/markdown/index.test.ts index 63f2868516..98b5534bf7 100644 --- a/app/utils/markdown/index.test.ts +++ b/app/utils/markdown/index.test.ts @@ -184,6 +184,11 @@ describe('Utility functions', () => { const result = convertSearchTermToRegex('你好'); expect(result.pattern).toEqual(/()(你好)/gi); }); + it('should create regex for Thai characters', () => { + const result = convertSearchTermToRegex('สวัสดี'); + expect(result.pattern).toEqual(/()(สวัสดี)/gi); + }); + it('should create regex for wildcard at the end', () => { const result = convertSearchTermToRegex('hello*'); diff --git a/app/utils/markdown/index.ts b/app/utils/markdown/index.ts index 95824ffcd2..b4048862e3 100644 --- a/app/utils/markdown/index.ts +++ b/app/utils/markdown/index.ts @@ -20,7 +20,7 @@ type LanguageObject = { // pattern to detect the existence of a Chinese, Japanese, or Korean character in a string // http://stackoverflow.com/questions/15033196/using-javascript-to-check-whether-a-string-contains-japanese-characters-includi -const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf\uac00-\ud7a3]/; +const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf\uac00-\ud7a3\u0e00-\u0e7f]/; const puncStart = /^[^\p{L}\d\s#]+/u; const puncEnd = /[^\p{L}\d\s]+$/u; @@ -347,7 +347,7 @@ export function convertSearchTermToRegex(term: string): SearchPattern { let pattern; if (cjkPattern.test(term)) { - // term contains Chinese, Japanese, or Korean characters so don't mark word boundaries + // term contains Chinese, Japanese, Korean or Thai characters so don't mark word boundaries pattern = '()(' + escapeRegex(term.replace(/\*/g, '')) + ')'; } else if ((/[^\s][*]$/).test(term)) { pattern = '\\b()(' + escapeRegex(term.substring(0, term.length - 1)) + ')'; From d132f813ceaee5c7730e2d16bcb439d370b969f3 Mon Sep 17 00:00:00 2001 From: Pattara Kiatisevi Date: Thu, 14 Nov 2024 12:25:40 +0700 Subject: [PATCH 2/3] Update index.test.ts just newline adjustments --- app/utils/markdown/index.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/utils/markdown/index.test.ts b/app/utils/markdown/index.test.ts index 98b5534bf7..78a8877cb0 100644 --- a/app/utils/markdown/index.test.ts +++ b/app/utils/markdown/index.test.ts @@ -184,12 +184,12 @@ describe('Utility functions', () => { const result = convertSearchTermToRegex('你好'); expect(result.pattern).toEqual(/()(你好)/gi); }); + it('should create regex for Thai characters', () => { const result = convertSearchTermToRegex('สวัสดี'); expect(result.pattern).toEqual(/()(สวัสดี)/gi); }); - it('should create regex for wildcard at the end', () => { const result = convertSearchTermToRegex('hello*'); expect(result.pattern).toEqual(/\b()(hello)/gi); From 9d0b524722bd23d386e548f953fd2c90c111cabc Mon Sep 17 00:00:00 2001 From: Pattara Kiatisevi Date: Thu, 14 Nov 2024 22:19:03 +0700 Subject: [PATCH 3/3] Update index.ts updated the comment to include Thai language --- app/utils/markdown/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/utils/markdown/index.ts b/app/utils/markdown/index.ts index b4048862e3..ab81985b33 100644 --- a/app/utils/markdown/index.ts +++ b/app/utils/markdown/index.ts @@ -18,7 +18,7 @@ type LanguageObject = { }; } -// pattern to detect the existence of a Chinese, Japanese, or Korean character in a string +// pattern to detect the existence of a Chinese, Japanese, Korean, or Thai character in a string // http://stackoverflow.com/questions/15033196/using-javascript-to-check-whether-a-string-contains-japanese-characters-includi const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf\uac00-\ud7a3\u0e00-\u0e7f]/;