From 5ac0b238798ec44d37f40bd9c072bb69c6249d2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Noss?= <74484200+lnoss@users.noreply.github.com> Date: Fri, 26 Jan 2024 09:55:56 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Add=20digits=20in=20searched=20k?= =?UTF-8?q?eywords?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also conserve combining characters, that are characters that are intended to modify other characters. Close Bug avec les titres contenant des guillemets anglais (" ") #25 --- ophirofox/content_scripts/europresse_search.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ophirofox/content_scripts/europresse_search.js b/ophirofox/content_scripts/europresse_search.js index 639d36a0..e45904d0 100644 --- a/ophirofox/content_scripts/europresse_search.js +++ b/ophirofox/content_scripts/europresse_search.js @@ -21,9 +21,17 @@ async function onLoad() { const search_terms = await consumeSearchTerms(); if (!search_terms) return; const stopwords = new Set(['d', 'l', 'et', 'sans']); + + /* + L = { Lu , Ll , Lt , Lm , Lo } + M = { Mn , Mc , Me } + Nd: a decimal digit + Unicode specification: https://www.unicode.org/reports/tr44/#General_Category_Values + Categories browser: https://www.compart.com/fr/unicode/category + */ const keywords = search_terms .replace(/œ/g, 'oe') - .split(/[^\p{L}]+/u) + .split(/[^\p{L}\p{M}\p{Nd}]+/u) .filter(w => !stopwords.has(w)) .join(' '); const keyword_field = document.getElementById("Keywords");